R/toposearch.R

Defines functions unify_vertex_names unify_vertex_names_rec get_outpop get_rootname get_root get_leaves2 get_leaves get_leafnames edges_to_igraph is_simplified is_valid numadmix doublefactorial numadmixplacements numtreesadmix numdags catalan numtrees

Documented in edges_to_igraph get_leafnames get_outpop get_rootname is_valid numadmix

# number of all binary trees with a given number of leaf nodes (== doublefactorial(2*n-3))
numtrees = function(n) factorial(2*n-2)/(2^(n-1)*factorial(n-1))

# Catalan number
catalan = function(n) factorial(2*n)/factorial(n+1)/factorial(n)

# number of possible DAGs
numdags = function(n) {
  if(n <= 1) return(1)
  sum(sapply(1:n, function(k) (-1)^(k+1) * choose(n, k) * 2^(k*(n-k)) * numdags(n-k)))
}

# number of trees with x admixture events
numtreesadmix = function(n, nadmix) numtrees(n) * numadmixplacements(2*n-2, nadmix)

# number of unique ways to add 'nadmix' undirected edges; each added edge increases the number of edges by 3
numadmixplacements = function(numedges, nadmix) {
  if(nadmix == 0) return(1)
  choose(numedges, 2) * numadmixplacements(numedges+3, nadmix-1)
}


doublefactorial = function(n) {
  if(n %% 2 == 0) {
    k = n/2
    out = 2^k*factorial(k)
  } else {
    k = (n+1)/2
    out = factorial(2*k-1)/(2^(k-1)*factorial(k-1))
  }
  out
}

#' Count number of admixture nodes
#'
#' @export
#' @param graph An admixture graph
#' @return Number of admixture nodes
numadmix = function(graph) {
  sum(degree(graph, mode='in') == 2)
}

#' Test if an admixture graph is valid
#'
#' @export
#' @param graph An admixture graph
#' @return `TRUE` if graph is valid, otherwise `FALSE`
is_valid = function(graph) {

  indegree = igraph::degree(graph, mode = 'in')
  outdegree = igraph::degree(graph, mode = 'out')

  igraph::is_simple(graph) &&
    igraph::is_connected(graph) &&
    igraph::is_dag(graph) &&
    sum(indegree == 0) == 1 &&
    all(indegree <= 2) &&
    all(outdegree <= 2) &&
    !any(outdegree == 0 & indegree != 1) &&
    !any(indegree == 0 & outdegree < 2)
}

is_simplified = function(graph) {
  indeg = degree(graph, mode = 'in')
  outdeg = degree(graph, mode = 'out')
  sum(indeg == 1 & outdeg == 1) == 0 && max(indeg + outdeg) == 3 && igraph::is_simple(graph)
}

#' Convert data frame graph to igraph
#'
#' @export
#' @param edges An admixture graph as an edge list data frame
#' @return An `igraph` object
edges_to_igraph = function(edges) {
  edges[,1:2] %>% as.matrix %>% igraph::graph_from_edgelist()
}

#' Get the population names of a graph
#'
#' @export
#' @param graph An admixture graph
#' @return Population names
get_leafnames = function(graph) {
  graph %>% V %>% {names(which(degree(graph, ., mode='out') == 0))}
}

get_leaves = function(graph) {
  graph %>% V %>% {.[degree(graph, ., mode='out') == 0]}
}

get_leaves2 = function(graph) {
  # uses 'subcomponent' to return leaves in a consistent order
  root = get_root(graph)
  graph %>% subcomponent(root, mode='out') %>% igraph::intersection(get_leaves(graph))
}


get_root = function(graph) {
  root = V(graph)[igraph::degree(graph, mode = 'in') == 0]
  #if(length(root) != 1) stop(paste0('Root problem ', paste0(names(root), collapse = ' ')))
  root
}

#' Get the root name
#'
#' @export
#' @param graph An admixture graph
#' @return Root name
get_rootname = function(graph) {
  names(get_root(graph))
}

#' Get the outgroup from a graph (if it exists)
#'
#' @export
#' @param graph An admixture graph
#' @return Outgroup name
get_outpop = function(graph) {
  # returns outpop, if there is an edge from the root to a leave, NULL otherwise
  #graph %>% V %>% names %>% pluck(2)
  stopifnot(class(graph) == 'igraph')
  root = get_root(graph)
  outpop = intersect(names(igraph::neighbors(graph, root)), get_leafnames(graph))
  if(length(outpop) == 1) return(outpop)
}

unify_vertex_names_rec = function(graph, node, vnamemap, sep1 = '.', sep2 = '_') {
  # recursive function which returns a vector of new, unique node names for all nodes which are reachable from 'node'
  children = neighbors(graph, node, mode='out')
  oldnam = names(node)
  if(length(children) == 0) {
    vnamemap[oldnam] = oldnam
    return(vnamemap)
  } else if(length(children) == 2) {
    leaves = get_leafnames(graph)
    c1 = igraph::distances(graph, children[1], leaves) %>% paste(collapse='')
    c2 = igraph::distances(graph, children[2], leaves) %>% paste(collapse='')
    if(c1 < c2) children %<>% rev
  }

  newnam = rep(NA, length(children))
  for(i in seq_along(children)) {
    vnamemap = unify_vertex_names_rec(graph, children[i], vnamemap, sep1 = sep1, sep2 = sep2)
    newnam[i] = vnamemap[names(children[i])]
  }
  stopifnot(all(!is.na(newnam)))
  vnamemap[oldnam] = paste0(sort(newnam), sep1, collapse = sep2)
  vnamemap
}


unify_vertex_names = function(graph, keep_unique = TRUE, sep1 = '.', sep2 = '_') {
  # this function changes the vertex names of inner vertices,
  # so that all isomorphic graphs with the same leaf nodes have equally labelled inner nodes
  leaves = get_leaves(graph)
  root = get_root(graph)
  lv = shortest_unique_prefixes(names(leaves))
  g = set_vertex_attr(graph, 'name', leaves, lv)
  ormap = names(V(g))
  names(ormap) = ormap

  nammap = unify_vertex_names_rec(g, root, ormap, sep1 = sep1, sep2 = sep2)
  nammap[lv] = names(leaves)
  names(nammap)[match(lv, names(nammap))] = names(leaves)
  nammap[names(root)] = 'R'
  if(!keep_unique) {
    changed = setdiff(names(nammap), c(names(root), names(leaves)))
    nammap[changed] = paste0('n', as.numeric(as.factor(nammap[changed])))
  }
  nammap %<>% map_chr(digest::digest) %>% paste0('x', .)
  set_vertex_attr(graph, 'name', V(graph), nammap)
}


#' Generate a random admixture graph
#'
#' This function randomly generates an admixture graph for a given set of leaf nodes
#' @export
#' @param leaves Names of the leaf nodes, or a number specifying how many leaf nodes there should be
#' @param numadmix Number of admixture events
#' @param simple Should edges leading to admixture nodes consist of separate admix edges and normal edges
#' @param outpop Outgroup population
#' @examples
#' rand_graph = random_admixturegraph(10, numadmix = 5)
#' plot_graph(rand_graph)
random_admixturegraph = function(leaves, numadmix = 0, simple = TRUE, outpop = NULL,
                                 nonzero_f4 = NULL, admix_constraints = NULL, event_order = NULL, ntry = 100) {
  # makes a random admixture graph
  # returns an 'igraph' graph object
  # 'leaves' can be a number of leaf nodes, or a character vector of leaf names

  stopifnot(class(leaves)[1] %in% c('numeric', 'integer', 'character'))
  if(length(leaves) == 1) {
    if(leaves > length(LETTERS)) leaves = paste0('l', seq_len(leaves))
    else leaves = LETTERS[seq_len(leaves)]
  }
  for(i in seq_len(ntry)) {
    graph = leaves %>%
      setdiff(outpop) %>%
      sample %>%
      random_newick(outpop = outpop) %>%
      newick_to_edges %>%
      graph_from_edgelist %>%
      insert_admix_n(numadmix, fix_outgroup = !is.null(outpop))
    if(all(map_lgl(list(nonzero_f4, admix_constraints, event_order), is.null)) ||
       satisfies_constraints(graph, nonzero_f4 = nonzero_f4, admix_constraints = admix_constraints,
                             event_order = event_order)) break
    if(i == ntry) warning('Constraints not satisfied!')
  }

  if(!simple) graph %<>% desimplify_graph
  graph
}


simplify_graph_old = function(graph) {
  # removes redundant nodes

  if(is_simplified(graph)) return(graph)
  convmat = FALSE
  if(class(graph) == 'matrix') {
    convmat = TRUE
    graph = graph_from_edgelist(graph)
  }
  graph %<>% igraph::simplify()
  i = 0
  repeat({
    i = i+1
    redundant = which(degree(graph, mode='in') == 1 & degree(graph, mode='out') == 1)
    if(length(redundant) == 0) break
    newfrom = names((neighbors(graph, redundant[1], mode='in')))
    newto = names((neighbors(graph, redundant[1], mode='out')))
    graph %<>% igraph::delete_vertices(redundant[1])
    graph %<>% igraph::add_edges(c(newfrom, newto))
    graph %<>% igraph::simplify()
    if(i > 100) stop('sg old error')
  })
  graph %<>% X_to_H
  if(convmat) graph = igraph::as_edgelist(graph)
  graph
}

simplify_graph_old = function(graph) {
  # removes redundant nodes

  if(is_simplified(graph)) return(graph)
  convmat = FALSE
  if(class(graph) == 'matrix') {
    convmat = TRUE
    graph = graph_from_edgelist(graph)
  }
  graph %<>% igraph::simplify()
  repeat({
    redundant = which(degree(graph, mode='in') == 1 & degree(graph, mode='out') == 1)
    if(length(redundant) == 0) break
    newfrom = names(neighbors(graph, redundant[1], mode='in'))
    newto = names(neighbors(graph, redundant[1], mode='out'))
    graph %<>% igraph::delete_vertices(redundant[1]) %>% igraph::add_edges(c(newfrom, newto))
    #%>% igraph::simplify()
  })
  graph %<>% igraph::simplify() %>% X_to_H
  if(convmat) graph = igraph::as_edgelist(graph)
  graph
}


#' Remove redundant edges
#'
#' Nodes with in and out degree of one will be removed.
#' @param graph an igraph object
#' @export
#' @examples
#' simple = simplify_graph(example_igraph)
#' plot_graph(example_igraph)
#' plot_graph(simple)
simplify_graph = function(graph) {
  # removes redundant nodes

  indegree = degree(graph, mode='in')
  outdegree = degree(graph, mode='out')
  convmat = FALSE
  if(class(graph) == 'matrix') {
    convmat = TRUE
    graph = graph_from_edgelist(graph)
  }
  graph %<>% igraph::simplify()
  redundant = which(indegree == 1 & outdegree == 1)

  if(length(redundant) != 0) {
    newfrom = redundant %>% igraph::adjacent_vertices(graph, ., mode='in') %>% unlist
    #newto = redundant %>% igraph::adjacent_vertices(graph, ., mode='out') %>% unlist
    newto = redundant %>% map_dbl(~nextnonredundant(graph, .))
    graph %<>% igraph::add_edges(interleave(newfrom, newto)) %>% igraph::delete_vertices(names(redundant))
  }
  graph %<>% igraph::simplify()
  if(any(indegree > 1 & outdegree > 1)) graph %<>% X_to_H
  if(!is_simplified(graph)) graph %<>% simplify_graph()
  if(convmat) graph = igraph::as_edgelist(graph)
  graph
}


nextnonredundant = function(graph, node) {
  n = neighbors(graph, node, mode = 'out')[1]
  if(degree(graph, n, mode = 'in') != 1 || degree(graph, n, mode = 'out') != 1) return(n)
  return(nextnonredundant(graph, n))
}


#' Add two nodes before each admixture node
#'
#' This is used to revert simplify_graph.
#' @export
#' @param graph An admixture graph
#' @examples
#' simple = simplify_graph(example_igraph)
#' desimple = desimplify_graph(simple)
#' plot_graph(simple)
#' plot_graph(desimple)
desimplify_graph = function(graph) {
  # adds two nodes before each admixture node
  convmat = FALSE
  if(class(graph)[1] == 'matrix') {
    convmat = TRUE
    graph = graph_from_edgelist(graph)
  }
  ograph = graph
  admix = which(degree(graph, mode='in') == 2)
  if(length(admix) == 0) return(graph)
  parents = names(unlist(unname(lapply(admix, function(v) neighbors(graph, v, mode='in')))))
  admixnamesrep = rep(names(admix), each=2)
  newnam = newnodenam(paste0(admixnamesrep, c('a', 'b')), names(V(graph)))
  graph = igraph::add_vertices(graph, length(admix)*2, name = newnam)
  newedges = c(rbind(parents, newnam, newnam, admixnamesrep))
  deledges = paste(parents, admixnamesrep, sep='|')
  graph = igraph::add_edges(graph, newedges)
  graph = igraph::simplify(graph)
  graph = igraph::delete_edges(graph, deledges)
  if(max(degree(graph, mode='in')) > 2 || max(degree(graph, mode='out')) > 2) {
    stop('Desimplify failed')
  }
  if(convmat) graph = igraph::as_edgelist(graph)
  graph
}


X_to_H = function(graph) {

  crosses = names(which(degree(graph, mode = 'in') > 1 & degree(graph, mode = 'out') > 1))
  for(i in seq_along(crosses)) {
    nam = newnodenam(crosses[i], names(V(graph)))
    parents = names(neighbors(graph, crosses[i], mode = 'in'))
    graph %<>% igraph::add_vertices(1, name = nam) %>%
      add_edges(c(interleave(parents, rep(nam, length(parents))), nam, crosses[i])) %>%
      delete_edges(paste0(parents, '|', crosses[i]))
  }
  graph
}

H_to_X = function(graph) {
  # keeps child names

  adm = names(which(degree(graph, mode = 'in') > 1 & degree(graph, mode = 'out') == 1))
  children = names(neighbors(graph, adm, mode = 'out'))
  ind = which(degree(graph, children, mode = 'out') == 2)
  for(i in seq_along(ind)) {
    parents = names(neighbors(graph, adm[i], mode = 'in'))
    graph %<>% delete_vertices(adm[i]) %>%
      add_edges(interleave(parents, rep(children[i], length(parents))))
  }
  graph
}

merge_nested_admix = function(graph) {

  graph %<>% X_to_H
  leaves = get_leafnames(graph)

  while(TRUE) {
    adm = names(which(degree(graph, mode = 'in') > 1)) %>% setdiff(leaves)
    admchildren = map_chr(adm, ~names(neighbors(graph, ., mode = 'out')))
    both = which(admchildren %in% names(which(degree(graph, admchildren, mode = 'in') > 1)))
    if(length(both) == 0) break
    parent = adm[both[1]]
    child = admchildren[both[1]]
    grandparents = names(neighbors(graph, parent, mode = 'in'))
    graph %<>%
      delete_vertices(parent) %>%
      add_edges(interleave(grandparents, rep(child, length(grandparents))))
  }
  graph #%<>% H_to_X
}


split_graph = function(graph) {
  # removes admixture nodes from an admixturegraph
  # returns a tree and a list of edges that can be used to reconstruct the original admixturegraph
  if(!'igraph' %in% class(graph)) {
    edges = graph
    graph = edges %>% edges_to_igraph
    edges %<>% filter(type == 'admix') %>%
      left_join(edges %>% transmute(fromfrom = from, from = to), by = 'from')
    pickedges = TRUE
  } else pickedges = FALSE
  graph %<>% simplify_graph()
  nodes = frommap = tomap = names(V(graph))
  names(frommap) = names(tomap) = nodes
  fromnodes = tonodes = c()
  deleted = kept = matrix(NA, 0, 2)
  if(!is_valid(graph)) stop('not valid!')
  root = get_rootname(graph)
  leaves = get_leafnames(graph)
  repeat({
    admix = setdiff(names(which(degree(graph, mode='in') == 2)), root)
    if(length(admix) == 0) break
    adm = admix[1]
    parents = sample(setdiff(sample(names(neighbors(graph, adm, mode='in'))), root))
    if(pickedges) {
      p1 = edges %>% filter(to == adm, from %in% parents) %>% slice_min(weight, with_ties = FALSE) %>% pull(from)
      if(length(p1) == 0) p1 = edges %>% filter(to == adm, fromfrom %in% parents) %>% slice_max(weight, with_ties = FALSE) %>% pull(fromfrom)
    } else {
      p1 = parents[1]
    }
    fromnode0 = names(neighbors(graph, p1, mode = 'in'))
    i = 0
    while(length(fromnode0) > 1) {
      i = i+1
      adm = p1
      parents = sample(setdiff(names(neighbors(graph, adm, mode='in')), root))
      if(pickedges) {
        p1 = edges %>% filter(to == adm, from %in% parents) %>% slice_min(weight, with_ties = FALSE) %>% pull(from)
        if(length(p1) == 0) p1 = edges %>% filter(to == adm, fromfrom %in% parents) %>% slice_max(weight, with_ties = FALSE) %>% pull(fromfrom)
      } else {
        p1 = parents[1]
      }
      if(length(parents) == 0 || !p1 %in% names(V(graph)) || root %in% parents) stop('split graph error')
      fromnode0 = names(neighbors(graph, p1, mode = 'in'))
      if(i > 100) stop('split graph error 2')
    }
    # stopifnot(!any(c(parents[1], admix[1]) %in% unlist(admixedges)))
    # too strict; probably won't be able to always reintroduce admixedges at appropriate locations after SPR

    fromnode = setdiff(names(neighbors(graph, p1, mode = 'out')), adm)
    tonode0 = setdiff(names(neighbors(graph, adm, mode = 'in')), p1)
    tonode = names(neighbors(graph, adm, mode = 'out'))
    fromnodes %<>% c(fromnode)
    tonodes %<>% c(tonode)

    graphnew = igraph::delete_vertices(graph, adm)
    graphnew = igraph::add_edges(graphnew, c(tonode0, tonode))
    if(tonode %in% admix) frommap[tonode0] = adm
    #if(delete_nodes) {
      graphnew = igraph::delete_vertices(graphnew, p1)
      if(p1 %in% tonodes) tonodes[tonodes == p1] = tonode
      if(length(fromnode) == 0) browser()
      if(p1 %in% fromnodes) fromnodes[fromnodes == p1] = fromnode
      graphnew2 = igraph::add_edges(graphnew, c(fromnode0, fromnode)) %>% simplify_graph()
      if(!is_simplified(graphnew2)) browser()
      graphnew = graphnew2
    #}
    if(is_valid(graphnew) && numadmix(graphnew) < numadmix(graph)) graph = graphnew
    else stop('Graph invalid')
    deleted = rbind(deleted, c(frommap[p1], adm))
    if(frommap[p1] != p1) {
      deleted = rbind(deleted, c(p1, frommap[p1]))
    }
    kept = rbind(kept, c(setdiff(parents, p1), adm))
  })
  tree = graph
  if(max(degree(tree, mode = 'in')) > 1) stop('Failed to make tree!')
  namedList(tree, fromnodes, tonodes, deleted, kept)
}

shorten_admixed_leaves = function(graph) {

  # keep terminal branch only for leaves which are non-admixed

  leaves = get_leafnames(graph)
  leaves %<>% intersect(names(which(degree(graph, leaves, mode = 'in') == 1)))
  parents = map_chr(leaves, ~names(neighbors(graph, ., mode = 'in')))
  ind = which(degree(graph, parents, mode = 'in') == 2)
  graph %>%
    delete_vertices(leaves[ind]) %>%
    set_vertex_attr('name', parents[ind], leaves[ind])
}

restore_admixed_leaves = function(graph) {

  # keep terminal branch only for leaves which are non-admixed

  leaves = get_leafnames(graph)
  adm = names(which(igraph::degree(graph, mode = 'in') > 1)) %>%
    intersect(leaves)
  if(length(adm) == 0) return(graph)
  nam = paste0('adm_before_', adm)

  graph %>%
    igraph::set_vertex_attr('name', adm, nam) %>%
    igraph::add_vertices(length(adm), attr = list(name = adm)) %>%
    igraph::add_edges(interleave(nam, adm))
}


newnodenam = function(newnam, current) {
  # this function takes a vector of proposed new node names (newnam), checks if they already exist,
  # and if so, returns unique newnams with newnam as prefix
  for(i in seq_along(newnam)) {
    mod = newnam[i]
    while(mod %in% current) {
      mod %<>% paste0(sample(letters, 1))
    }
    newnam[i] = mod
    current %<>% c(mod)
  }
  newnam
}


#' Generate a random binary graph
#' @export
#' @param n The number of terminal nodes, or a vector of population labels.
#' @param start Prefix.
#' @param end Postfix.
#' @param outpop Outgroup (if \code{n} is a vector of labels).
#' @return Tree in newick format.
#' @examples
#' random_newick(5)
#' random_newick(c('a', 'b', 'c', 'd')) # toplogy random, but pop order fixed
#' random_newick(sample(c('a', 'b', 'c', 'd'))) # toplogy and pop order random
random_newick = function(n, start = '', end = '', outpop = NULL) {
  # recursive function which returns topology of a random, binary tree in newick format
  # redirects to 'random_newick_named' when length(n) > 1
  if(length(n) > 1) {
    if(!is.null(outpop)) {
      start = paste0(start, '(', outpop, ',')
      end = paste0(')', end)
      n = setdiff(n, outpop)
    }
    return(random_newick_named(n, start, end))
  }
  if(n == 1) return('')
  n1 = sample(n-1,1)
  n2 = n-n1
  return(paste0(start, '(',random_newick(n1, ''),',',random_newick(n2, ''),')', end))
}

random_newick_named = function(names, start = '', end = '') {
  # recursive function which returns a labelled random, binary tree in newick format with named leaves in order of input
  n = length(names)
  if(n == 1) return(names)
  n1 = sample(n-1,1)
  return(paste0(start, '(',random_newick_named(names[1:n1]),',',random_newick_named(names[(n1+1):n]),')', end))
}

#' Turn a newick format tree to a matrix of edges
#' @export
#' @param newick Tree in newick format.
#' @param node Root label of the tree.
#' @param edgemat Used for recursive function calls.
#' @return Tree as two column matrix of edges (adjacency list)
#' @examples
#' newick = random_newick(c('a', 'b', 'c', 'd'))
#' newick
#' newick_to_edges(newick)
newick_to_edges = function(newick, node = 'R', edgemat = matrix(NA,0,2)) {
  # turns binary tree in newick format into matrix of edges (adjacency list)

  newick = gsub('^\\(', '', gsub('\\)$', '', gsub(';$', '', newick)))
  opencount = 0
  for(i in 1:nchar(newick)) {
    char = substr(newick, i,i)
    if(char == '(') opencount = opencount+1
    if(char == ')') opencount = opencount-1
    if(char == ',' && opencount == 0) break
  }
  left = substr(newick, 1, i-1)
  right = substr(newick, i+1, nchar(newick))
  stopifnot(str_count(newick, ',') >= 1)
  if(str_count(left, ',') == 0) {
    nodel = left
    edgesleft = matrix(NA, 0, 2)
  } else {
    nodel = paste0(node, 'l')
    edgesleft = newick_to_edges(left, nodel)
  }
  if(str_count(right, ',') == 0) {
    noder = right
    edgesright = matrix(NA, 0, 2)
  } else {
    noder = paste0(node, 'r')
    edgesright = newick_to_edges(right, noder)
  }
  rbind(c(node, nodel), edgesleft, c(node, noder), edgesright, edgemat)
}


#' Insert admixture edges into graph
#'
#' @param graph An admixture graph
#' @param fromnodes List of nodes. New edges will originate above these nodes.
#' @param tonodes List of nodes. New edges will end above these nodes.
#' @param substitute_missing If `TRUE`, an attempt will be made to insert random other edges
#'   if some of the provided edges could not be inserted.
#' @param allow_below_admix Allow insertion of edges which begin or end directly underneath an admixture node
#' @return Adxmiture graph with inserted edges
insert_admix_old = function(graph, fromnodes, tonodes, substitute_missing = FALSE,
                               allow_below_admix = FALSE) {
  # inserts edges fromnodes -> tonodes into graph
  # inverse of 'split_graph'
  # stopifnot(all(c(sapply(admixedges, `[`, c(2, 4))) %in% names(V(graph))))
  # some nodes will get lost when splitting; insert random amix edges instead

  stopifnot(length(fromnodes) == length(tonodes))
  leaves = sort(get_leafnames(graph))
  desimplify = !is_simplified(graph)
  if(desimplify) graph %<>% simplify_graph

  miss = 0
  maxprev = max(degree(graph))
  for(i in rev(seq_len(length(fromnodes)))) {
    fromnode = fromnodes[i]
    tonode = tonodes[i]
    if(!fromnode %in% names(V(graph)) ||
       !tonode %in% names(V(graph)) ||
       is.finite(igraph::distances(graph, tonode, fromnode, mode = 'out'))) {
      miss = miss+1
      next
    }
    toedge2parent = neighbors(graph, tonode, mode='in')
    toedge2sib = setdiff(names(neighbors(graph, toedge2parent, mode='out')), tonode)
    if(!allow_below_admix) {
      if(length(toedge2sib) == 0 ||
         degree(graph, fromnode, mode='in') > 1 ||
         degree(graph, toedge2sib, mode='in') > 1) {
        miss = miss+1
        next
      }
    }
    fromnode_parent = names(neighbors(graph, fromnode, mode='in'))
    tonode_parent = names(neighbors(graph, tonode, mode='in'))
    if(length(fromnode_parent) != 1 || length(tonode_parent) != 1) {
      miss = miss+1
      next
    }
    graph = igraph::delete_edges(graph, c(paste(fromnode_parent, fromnode, sep='|'),
                                          paste(tonode_parent, tonode, sep='|')))
    newnam = newnodenam(c(fromnode_parent, 'admix'), names(V(graph)))
    new1 = newnam[1]
    new2 = newnam[2]
    graph = igraph::add_vertices(graph, 2, name = newnam)
    newedges = c(fromnode_parent, new1, new1, fromnode, tonode_parent, new2, new2, tonode, new1, new2)
    graph = igraph::add_edges(graph, newedges)
    stopifnot(max(degree(graph)) <= maxprev)
  }
  if (miss > 0) {
    if(substitute_missing) graph = insert_admix_random(graph, miss)
    else warning('did not insert or substitute admixedge')
  }
  if(desimplify) graph %<>% simplify_graph %>% desimplify_graph
  if(!isTRUE(all.equal(leaves, sort(get_leafnames(graph))))) stop('insert_admix_old failed!')
  graph
}


insert_admix_random = function(graph, nadmix) {
  # graph should be simplified
  for(i in seq_len(nadmix)) {
    root = get_root(graph)
    firstgen = neighbors(graph, root, mode='out')
    admix = V(graph)[(degree(graph, mode='in') > 1)]
    admixchildren = do.call(c, igraph::ego(graph, 1, admix, mode='out'))
    admixparents = do.call(c, igraph::ego(graph, 1, admix, mode='in'))
    admixsiblings = do.call(c, igraph::ego(graph, 1, admixparents, mode='out'))
    exclude = c(admix, firstgen, root)
    if(!is.null(admixchildren)) exclude = c(exclude, admixchildren)
    cand_from = sample(igraph::difference(V(graph), exclude))
    for(fromnode in names(cand_from)) {
      upstream = subcomponent(graph, fromnode, mode='in')
      downstream1 = neighbors(graph, fromnode, mode='out')
      siblings = neighbors(graph, upstream[2], mode='out') # upstream[2] should be parent of fromnode
      uncles = neighbors(graph, neighbors(graph, upstream[2], mode='in'), mode='out')
      exclude = c(upstream, downstream1, siblings, uncles)
      if(!is.null(admixsiblings)) exclude = c(exclude, admixsiblings)
      cand_to = igraph::difference(cand_from, exclude)
      if(length(cand_to) > 0) break
    }
    if(length(cand_from) == 0 | length(cand_to) == 0) {
      warning(paste0('could only insert ', i-1, ' admixture nodes'))
      break
    }
    tonode = sample(names(cand_to), 1)
    stopifnot(all(c(fromnode, tonode) %in% names(V(graph))))
    graphnew = insert_admix_old(graph, fromnode, tonode, substitute_missing = FALSE)
    stopifnot(numadmix(graphnew) > numadmix(graph))
    stopifnot(igraph::is_simple(graphnew))
    stopifnot(igraph::is_dag(graphnew))
    graph = graphnew
  }
  graph
}


#' Insert a single edge into graph
#'
#' @export
#' @param graph An admixture graph
#' @param source_from Parent node of the source edge
#' @param source_to Child node of the source edge
#' @param dest_from Parent node of the destination edge
#' @param dest_to Child node of the destination edge
#' @param substitute Should another edge be inserted, if the one specified doesn't work?
#' @return Adxmiture graph with inserted edge
#' @seealso \code{\link{delete_admix}}, \code{\link{insert_admix_n}}
insert_admix = function(graph, source_from = NULL, source_to = NULL, dest_from = NULL, dest_to = NULL,
                        substitute = FALSE, fix_outgroup = TRUE) {
  # assumes graph is simplified

  if(length(source_to) > 1) {
    n = length(source_to)
    f = function(x, y) insert_admix(x, source_from[y], source_to[y], dest_from[y], dest_to[y], substitute = substitute)
    graph = source_to %>% length %>% seq_len %>% reduce(f, .init = graph)
    return(graph)
  }

  leaves = sort(get_leafnames(graph))
  nodes = names(V(graph))
  if(is.null(source_to) || is.null(dest_to) || length(setdiff(c(source_to, dest_to), nodes)) > 0 && substitute) {
    e = graph %>% find_newedges(fix_outgroup = fix_outgroup) %>% slice_sample(n=1)
    source_from = e$source_from
    dest_from = e$dest_from
    source_to = e$source_to
    dest_to = e$dest_to
  } else if(is.null(source_from) || is.null(dest_from) || length(setdiff(c(source_from, dest_from), nodes)) > 0 && substitute) {
    getfrom = function(x) sample(names(neighbors(graph, x, mode = 'in')))[1]
    source_from = getfrom(source_to)
    dest_from = getfrom(dest_to)
  }
  nam = newnodenam(c(source_from, 'admix'), names(V(graph)))
  newgraph = graph %>% add_vertices(2, attr = list(name = nam)) %>%
    add_edges(c(source_from, nam[1], nam[1], source_to, dest_from, nam[2], nam[2], dest_to, nam[1], nam[2])) %>%
    delete_edges(paste(c(source_from, dest_from), c(source_to, dest_to), sep = '|'))
  leaves2 = sort(get_leafnames(newgraph))
  if(!is_valid(newgraph) || !isTRUE(all.equal(leaves2, leaves))) {
    if(substitute) graph %<>% insert_admix(substitute = FALSE)
    else browser()
    #else stop("Inserting edge failed!")
  } else graph = newgraph
  graph
}


#' Insert admixture edges into graph
#'
#' @export
#' @param graph An admixture graph
#' @param from List of nodes. New edges will originate above these nodes.
#' @param to List of nodes. New edges will end above these nodes.
#' @param substitute Should another edge be inserted, if the one specified doesn't work?
#' @return Admixture graph with inserted edges
#' @seealso \code{\link{insert_admix}} \code{\link{delete_admix}}
insert_admix_n = function(graph, n = 1, fix_outgroup = TRUE) {

  # if(all(map_lgl(list(source_from, source_to, dest_from, dest_to), is.null))) {
  #   f = function(x, y) insert_admix(x)
  # } else {
  #   if(is.null(source_from) && is.null(dest_from)) {
  #     # implement this so that insert_admix_old can be replaced
  #     getfrom = ~sample(names(neighbors(graph, ., mode = 'in')))[1]
  #     source_from = map_chr(source_to, getfrom)
  #     dest_from = map_chr(dest_to, getfrom)
  #   }
  #   n = length(source_from)
  #   f = function(x, y) insert_admix(x, source_from[y], source_to[y], dest_from[y], dest_to[y])
  # }
  f = function(x, y) insert_admix(x, substitute = TRUE, fix_outgroup = fix_outgroup)
  seq_len(n) %>% reduce(f, .init = graph)
}

subtree_prune_and_regraft = function(graph, only_leaves = FALSE, fix_outgroup = TRUE) {
  # cuts of a parts of the tree and attaches it at a random location
  # root -> outgroup stays fixed
  root = get_root(graph)
  stopifnot(degree(graph, root, mode='in') == 0)
  stopifnot(max(degree(graph, V(graph), mode='in')) == 1)

  if(fix_outgroup) firstgen = neighbors(graph, root, mode='out')
  else firstgen = c()
  i = 0
  repeat({
    i = i+1
    excl = c(firstgen, root)
    if(only_leaves) excl = c(excl, V(graph)[degree(graph, mode='out') > 0])
    cutnode = sample(igraph::difference(V(graph), excl), 1)
    cutnodes = subcomponent(graph, cutnode, mode='out')
    cutparent = neighbors(graph, cutnode, mode='in')
    cutgrandparent = neighbors(graph, cutparent, mode='in')
    cutsibling = igraph::difference(neighbors(graph, cutparent, mode='out'), cutnode)
    hostnodes = igraph::difference(V(graph), c(root, cutnodes, cutparent, cutsibling, firstgen))
    if(length(hostnodes) > 0 && length(cutsibling) != 0) break
    if(i > 100) stop('spr error')
  })

  hostnode = sample(names(hostnodes), 1)
  hostparent = names(neighbors(graph, hostnode, mode='in'))

  newnam = paste(hostnode, sample(letters, 1), sep='_')
  while(newnam %in% names(V(graph))) newnam = paste0(newnam, sample(letters, 1))

  tryCatch({
  graph %<>% igraph::add_vertices(1, name=newnam) %>%
    igraph::add_edges(c(names(cutgrandparent), names(cutsibling),
                hostparent, newnam,
                newnam, hostnode,
                newnam, names(cutnode))) %>%
    igraph::delete_vertices(names(cutparent)) %>%
    igraph::delete_edges(paste(hostparent, hostnode, sep='|'))
  }, error = function(e) stop('spr error'))
  stopifnot(igraph::is_simple(graph))
  stopifnot(igraph::is_dag(graph))
  graph
}


admixturegraph_prune_and_regraft = function(graph, only_leaves = FALSE, fix_outgroup = TRUE) {
  # 1. remove admixture edges (one randomly selected from each admixture node)
  # 2. runs subtree_prune_and_regraft on resulting tree
  # 3. add admixture edges back on
  if(numadmix(graph) == 0) return(subtree_prune_and_regraft(graph, only_leaves = only_leaves, fix_outgroup = fix_outgroup))
  o = graph
  desimplify = !is_simplified(graph)
  if(desimplify) graph %<>% simplify_graph
  if(!is_valid(graph)) stop('apr error')
  spl = split_graph(graph)
  graph = subtree_prune_and_regraft(spl$tree, only_leaves = only_leaves, fix_outgroup = fix_outgroup)
  #graph = insert_admix_old(graph, spl$fromnodes, spl$tonodes, substitute_missing = TRUE)
  graph = insert_admix(graph, source_to = spl$fromnodes, dest_to = spl$tonodes, substitute = TRUE)
  stopifnot(is_valid(graph))
  if(desimplify) graph = desimplify_graph(graph)
  graph
}

#' Modify a graph by regrafting a leaf
#'
#' @export
#' @param graph An admixture graph
#' @return A new admixture graph
spr_leaves = function(graph, fix_outgroup = TRUE)
  admixturegraph_prune_and_regraft(graph, only_leaves = TRUE, fix_outgroup = fix_outgroup)

#' Modify a graph by regrafting a subcomponent
#'
#' @export
#' @param graph An admixture graph
#' @return A new admixture graph
spr_all = function(graph, fix_outgroup = TRUE)
  admixturegraph_prune_and_regraft(graph, only_leaves = FALSE, fix_outgroup = fix_outgroup)

#' Modify a graph by moving an admixture edge
#'
#' @export
#' @param graph An admixture graph
#' @param fix_outgroup Keep outgroup in place
#' @return A new admixture graph
move_admixedge_once = function(graph, fix_outgroup = TRUE) {
  # selects random admixture edge, and moves it to next closest possible spot
  # if not possible, select other admix node
  # if none possible, print warning
  desimplify = !is_simplified(graph)
  if(desimplify) graph %<>% simplify_graph
  admix = sample(names(V(graph)[(degree(graph, mode='in') > 1)]))
  nadmix = length(admix)
  root = get_rootname(graph)
  if(fix_outgroup) firstgen = names(neighbors(graph, root, mode='out'))
  else firstgen = ''
  for(i in seq_len(nadmix)) {
    parents = sample(names(neighbors(graph, admix[i], mode='in')))
    for(j in 1:2) {
      sibling = setdiff(names(neighbors(graph, parents[j], mode='out')), admix[i])
      grandparent = names(neighbors(graph, parents[j], mode='in'))
      if(length(sibling) == 0 || length(grandparent) == 0) next
      for(n in names(subcomponent(graph, parents[j]))) {
        # subcomponent will return nodes ordered by distance to n
        if(!n %in% root &&
           !n %in% firstgen &&
           !n %in% parents &&
           !n %in% grandparent &&
           !n %in% admix &&
           !n %in% c(names(neighbors(graph, parents[1], mode='out')),
                     names(neighbors(graph, parents[2], mode='out'))) &
           !n %in% names(subcomponent(graph, admix[i], mode='out'))) {
          newgrandparent = names(neighbors(graph, n, mode='in'))
          if(length(c(grandparent, sibling, newgrandparent, parents[j], parents[j], n)) %% 2 != 0) stop('move admix error')
          graph = igraph::add_edges(graph, c(grandparent, sibling, newgrandparent, parents[j], parents[j], n))
          graph = igraph::delete_edges(graph, c(paste(newgrandparent, n, sep='|'),
                                              paste(grandparent, parents[j], sep='|'),
                                              paste(parents[j], sibling, sep='|')))
          graph %<>% simplify_graph
          stopifnot(igraph::is_dag(graph))
          if(desimplify) graph = desimplify_graph(graph)
          return(graph)
        }
      }
    }
  }
  #if(nadmix > 0) warning('no suitable attachment points found for admixture edge')
  if(desimplify) graph = desimplify_graph(graph)
  return(graph)
}

#' Modify a graph by permuting leaf nodes
#'
#' @export
#' @param graph An admixture graph
#' @param fix_outgroup Keep outgroup in place
#' @return A new admixture graph
permute_leaves = function(graph, fix_outgroup = TRUE) {

  leaves = V(graph)[degree(graph, v = V(graph), mode = c('out')) == 0]
  if(fix_outgroup) leaves = leaves[-1]
  nam = names(leaves)
  igraph::set_vertex_attr(graph, 'name', leaves, sample(nam))
}

#' Modify a graph by swapping two leaf nodes
#'
#' @export
#' @param graph An admixture graph
#' @param fix_outgroup Keep outgroup in place
#' @return A new admixture graph
swap_leaves = function(graph, fix_outgroup = TRUE) {

  leaves = V(graph)[degree(graph, v = V(graph), mode = c('out')) == 0]
  if(fix_outgroup) leaves = leaves[-1]
  leaves = sample(leaves)[1:2]
  nam = names(leaves)
  igraph::set_vertex_attr(graph, 'name', leaves, sample(nam))
}


swap_admix = function(graph) {

  admixedges = graph %>% find_admixedges %>% sample_frac(1)
  sw = admixedges %>% filter(!duplicated(to)) %>% slice(1:2) %>% mutate(e = paste(from, to, sep='|'))
  graph %>%
    igraph::delete_edges(sw$e) %>%
    igraph::add_edges(c(sw$from[1], sw$to[2], sw$from[2], sw$to[1]))
}

#' Modify a graph flipping the direction of an admixture edge
#'
#' @export
#' @param graph An admixture graph
#' @param fix_outgroup Keep outgroup in place (has no effect here)
#' @return A new admixture graph
flipadmix_random = function(graph, fix_outgroup = TRUE) {

  desimplify = !is_simplified(graph)
  if(desimplify) graph %<>% simplify_graph
  admixedges = graph %>% find_admixedges %>% sample_frac(1)
  el = igraph::as_edgelist(graph)
  out = graph
  for(i in seq_len(nrow(admixedges))) {
    newg = admixedges %>% slice(i) %>% mutate(graph = map2(from, to, ~flipadmix2(el, .x, .y))) %$% graph[[1]]
    if(!is.null(newg)) {
      out = newg
      break
    }
  }
  if(desimplify) out %<>% desimplify_graph
  out
}

#' Modify a graph by applying n mutation functions
#'
#' @export
#' @param graph An admixture graph
#' @param n Number of functions to apply
#' @param funs List of function from which to choose
#' @param fix_outgroup Keep outgroup in place
#' @return A new admixture graph
mutate_n = function(graph, n = 2, funs = list(spr_all, spr_leaves, swap_leaves, move_admixedge_once,
                                              flipadmix_random, mutate_n), fix_outgroup = TRUE) {

  funsn = sample(funs, n, replace = TRUE) %>%
    map(~function(x) .x(x, fix_outgroup=fix_outgroup))
  purrr::compose(!!!funsn)(graph)
}


get_mutfuns = function(mutfuns, probs, fix_outgroup = TRUE) {
  # return a list of list of graph mutation functions
  # outer list is for each generation, inner list is for each graph, and is named
  # probs is a matrix numgen x numgraphs with probabilities

  if(is.null(names(mutfuns))) names(mutfuns) = paste0('fun', seq_along(mutfuns))
  if(length(unique(names(mutfuns))) < length(mutfuns)) stop("Names in 'mutfuns' are not unique!")
  map(seq_len(nrow(probs)), ~{
    nam = sample(names(mutfuns), ncol(probs), replace = TRUE, prob = probs[.,])
    map(mutfuns, ~function(graph) .(graph, fix_outgroup = fix_outgroup)) %>%
      set_names(nam)
  })
}


add_generation = function(models, numgraphs, numsel, qpgfun, mutfuns, opt_worst_residual = FALSE, parallel = TRUE, verbose = TRUE) {

  space = paste0(paste(rep(' ', 50), collapse=''), '\r')
  if(verbose) alert_info(paste0('Selecting winners...', space))
  lastgen = max(models$generation)
  oldmodels = models %>% filter(generation == lastgen, !is.na(score))
  numsel = min(nrow(oldmodels), numsel)
  numeach = ceiling(numgraphs/numsel)
  winners = oldmodels %>% mutate(prob = score_to_prob(score)) %>% sample_n(numsel-1, weight=prob) %>% select(-prob)
  optvar = if(opt_worst_residual) 'worst_residual' else 'score'
  winners %<>% bind_rows(oldmodels %>% slice_min(.data[[optvar]], with_ties = FALSE))
  sq = (numsel+1):numgraphs
  newmodels = tibble(generation = lastgen+1, index = sq,
                     igraph = rep(winners$igraph, numeach)[sq],
                     oldscore = rep(winners$score, numeach)[sq],
                     oldindex = rep(winners$index, numeach)[sq])
  mutations = sample(mutfuns, numgraphs-numsel, replace = TRUE)
  if(parallel) {
    map = function(...) furrr::future_map(..., .options = furrr::furrr_options(seed = TRUE))
    imap = function(...) furrr::future_imap(..., .options = furrr::furrr_options(seed = TRUE))
  }
  if(verbose) alert_info(paste0('Generating new graphs...', space))
  newmodels %<>% mutate(mutation = names(mutations),
                        igraph = imap(igraph, ~tryCatch(exec(mutations[[.y]], .x), error = function(e) .x)))
  if(verbose) alert_info(paste0('Evaluating graphs...', space))
  newmodels %<>% mutate(out = map(igraph, qpgfun))
  if(verbose) alert_info(paste0('Attaching to previous generations...', space))
  winners %>%
    mutate(generation = lastgen+1, index = 1:n()) %>%
    bind_rows(newmodels %>% unnest_wider(out))
}



score_to_prob = function(score) {
  logscore = log10(score)
  sc = 1-logscore/max(logscore)+1e-10
  sc = sc^20
  sc = replace_na(sc/sum(sc), 0)
  sc
}


evolve_topology = function(init, numgraphs, numgen, numsel, qpgfun, mutlist, repnum,
                           opt_worst_residual = FALSE, parallel = TRUE,
                           keep = 'all', store_intermediate = NULL, stop_after = NULL, verbose = TRUE) {
  out = init
  stop_at = Sys.time() + stop_after
  for(i in seq_len(numgen)) {
    if(!is.null(stop_after) && Sys.time() > stop_at) break
    newmodels = add_generation(out, numgraphs, numsel, qpgfun, mutlist[[i]], opt_worst_residual = opt_worst_residual,
                               parallel = parallel, verbose = verbose)
    if(!is.null(store_intermediate)) {
      fl = paste0(store_intermediate, '_rep', repnum, '_gen', i, '.rds')
      saveRDS(newmodels, fl)
    }
    optvar = if(opt_worst_residual) 'worst_residual' else 'score'
    if(keep == 'all') out %<>% bind_rows(newmodels)
    else if(keep == 'best') out %<>% group_by(generation) %>% slice_min(.data[[optvar]], with_ties = FALSE) %>%
      ungroup %>% bind_rows(newmodels)
    else out = newmodels
    if(verbose) {
      #best = newmodels %>% filter(index > numsel) %>% top_n(numsel, -jitter(score, amount = 1e-9)) %$% score %>% sort
      best = newmodels %>% slice_min(.data[[optvar]], n = numsel, with_ties = FALSE) %>% pull(.data[[optvar]]) %>% sort
      alert_success(paste0('Generation ', i, '  Best ', optvar,'s: ', paste(round(best, 3), collapse=', '),
                    paste(rep(' ', 30), collapse=''), '\n'))
    }
  }
  if(verbose) cat('\n')
  if(keep == 'best') out %<>% group_by(generation) %>% slice_min(.data[[optvar]], with_ties = FALSE) %>% ungroup
  out
}


optimize_admixturegraph_single = function(pops, precomp, mutlist, repnum, numgraphs = 50, numgen = 5,
                                          numsel = 5, numadmix = 0, numstart = 1, outpop = NA, initgraphs = NULL,
                                          opt_worst_residual = FALSE,
                                          parallel = TRUE, stop_after = NULL, store_intermediate = NULL,
                                          keep = 'all', verbose = TRUE, ...) {

  kp = c('edges', 'score', 'f3')
  if(opt_worst_residual) {
    kp = c(kp, 'worst_residual')
    qpgfun = function(graph) qpgraph(data = precomp, graph = graph,
                                     numstart = numstart, return_fstats = opt_worst_residual, verbose = FALSE, ...)[kp]
  } else {
    # qpgfun = function(graph) qpgraph(data = NULL, graph = graph, f3precomp = precomp,
    #                                  numstart = numstart, return_fstats = opt_worst_residual, verbose = FALSE, ...)[kp]
    qpgfun = function(graph) qpgraph(data = precomp, graph = graph,
                                     numstart = numstart, return_fstats = opt_worst_residual, verbose = FALSE, ...)[kp]
  }

  # qpgfun = possibly(qpgfun, otherwise = NULL)
  space = paste0(paste(rep(' ', 50), collapse=''), '\r')
  if(verbose) alert_info(paste0('Generate new graphs...', space))
  if(is.null(initgraphs)) initgraphs = replicate(numgraphs, random_admixturegraph(pops, numadmix, outpop = outpop),
                                                simplify = FALSE)
  else initgraphs = initgraphs[round(seq(1, length(initgraphs), numgraphs))]
  if(verbose) alert_info(paste0('Evaluate graphs...', space))
  if(parallel) map = function(...) furrr::future_map(..., .options = furrr::furrr_options(seed = TRUE))
  init = tibble(generation=0, index = seq_len(numgraphs),
                igraph = initgraphs, mutation = 'random_admixturegraph') %>%
    mutate(out = map(igraph, qpgfun), isn = map_lgl(out, is.null))
  if(all(init$isn)) stop('All NULL!')

  init %<>% select(-isn) %>% unnest_wider(out) %>% mutate(oldscore = score, oldindex = index)
  if(verbose) {
    optvar = if(opt_worst_residual) 'worst_residual' else 'score'
    best = init %>% filter(!is.na(score)) %>% slice_min(.data[[optvar]], n = min(numsel, nrow(.data)), with_ties = FALSE) %>% pull(.data[[optvar]])
    alert_success(paste0('Generation 0  Best ', optvar, 's: ', paste(round(best), collapse=', '), space, '\n'))
  }

  evolve_topology(init, numgraphs, numgen, numsel, qpgfun, mutlist, repnum, opt_worst_residual = opt_worst_residual,
                  parallel = parallel,
                  keep = keep, store_intermediate = store_intermediate, stop_after = stop_after, verbose = verbose)
}



#' Find well fitting admixture graphs
#'
#' This function generates and evaluates admixture graphs in `numgen` iterations across `numrep` independent repeats
#' to find well fitting admixturegraphs. It uses the function \code{\link[furrr]{future_map}}
#' to parallelize across the independent repeats. The function \code{\link[future]{plan}} can be called
#' to specify the details of the parallelization. This can be used to parallelize across cores or across nodes on
#' a compute cluster. Setting `numadmix` to 0 will search for well fitting trees, which is much faster than searching
#' for admixture graphs with many admixture nodes.
#' @param data Input data in one of three forms:
#' \enumerate{
#' \item A 3d array of blocked f2 statistics, output of \code{\link{f2_from_precomp}} or \code{\link{f2_from_geno}}
#' \item A directory which contains pre-computed f2-statistics
#' \item The prefix of genotype files
#' }
#' @param pops Populations for which to fit admixture graphs (default all)
#' @param outpop An outgroup population which will split at the root from all other populations in all tested graphs. If one of the populations is know to be an outgroup, designating it as `outpop` will greatly reduce the search space compared to including it and not designating it as `outpop`.
#' @param numrep Number of independent repetitions (each repetition can be run in parallel)
#' @param numgraphs Number of graphs in each generation
#' @param numgen Number of generations
#' @param numsel Number of graphs which are selected in each generation. Should be less than `numgraphs`.
#' @param numadmix Number of admixture events within each graph
#' @param numstart Number of random initializations in each call to `qpgraph`. Defaults to 1, to speed up the graph optimization.
#' @param keep Which models should be returned. One of `all`, `best`, `last`
#' \itemize{
#' \item `all` (default): Return all evaluated graphs
#' \item `best`: Return only the best fitting graph from each repeat and each generation
#' \item `last`: Return all graphs from the last generation
#' }
#' @param initgraphs Optional graph or list of igraphs to start with. If `NULL`, optimization will start with random graphs.
#' @param mutfuns Functions used to modify graphs. Defaults to the following:
#' \itemize{
#' \item \code{\link{spr_leaves}}: Subtree prune and regraft leaves. Cuts a leaf node and attaches it
#' to a random other edge in the graph.
#' \item \code{\link{spr_all}}: Subtree prune and regraft. Cuts any edge and attaches the new orphan node
#' to a random other edge in the graph, keeping the number of admixture nodes constant.
#' \item \code{\link{swap_leaves}}: Swaps two leaf nodes.
#' \item \code{\link{move_admixedge_once}}: Moves an admixture edge to a nearby location.
#' \item \code{\link{flipadmix_random}}: Flips the direction of an admixture edge (if possible).
#' \item \code{\link{mutate_n}}: Apply `n` of the mutation functions in this list to a graph (defaults to 2).
#' }
#' See examples for how to make new mutation functions.
#' @param mutprobs Relative frequencies of each mutation function.
#' \itemize{
#' \item `NULL` (default) means each mutation function is picked with equal probability
#' \item A numeric vector of length equal to `mutfuns` defines the relative frequency of each mutation function
#' \item A matrix of dimensions `numgen` x `length(mutfuns)` defines the relative frequency of each mutation function in each generation
#' }
#' @param opt_worst_residual Optimize for lowest worst residual instead of best score. `FALSE` by default, because the likelihood score is generally a better indicator of the quality of the model fit. Optimizing for the lowest worst residual is also slower (because f4-statistics need to be computed).
#' @param store_intermediate Path and prefix of files for intermediate results to `.rds`. Can be useful if `find_graphs_old` doesn't finish sucessfully.
#' @param parallel Parallelize over repeats (if `numrep > 1`) or graphs (if `numrep == 1`) by replacing \code{\link[purrr]{map}} with \code{\link[furrr]{future_map}}. Will only be effective if \code{\link[future]{plan}} has been set.
#' @param stop_after Stop optimization after `stop_after` seconds (and after finishing the current generation).
#' @param verbose Print progress updates
#' @param ... Additional arguments passed to \code{\link{qpgraph}}
#' @return A nested data frame with one model per line
#' @seealso \code{\link{qpgraph}}
#' @examples
#' \dontrun{
#' find_graphs_old(example_f2_blocks, numrep = 200, numgraphs = 100,
#'             numgen = 20, numsel = 5, numadmix = 3)
#' }
#' \dontrun{
#' # Making new mutation functions by modifying or combining existing ones:
#' newfun1 = function(graph, ...) mutate_n(graph, 3, ...)
#' newfun2 = function(graph, ...) flipadmix_random(spr_leaves(graph, ...), ...)
#' find_graphs_old(f2_blocks, mutfuns = namedList(spr_leaves, newfun1, newfun2), mutprobs = c(0.2, 0.3, 0.5))
#' }
find_graphs_old = function(data, pops = NULL, outpop = NULL, numrep = 1, numgraphs = 50,
                       numgen = 5, numsel = 5, numadmix = 0, numstart = 1, keep = c('all', 'best', 'last'), initgraphs = NULL,
                       mutfuns = namedList(spr_leaves, spr_all, swap_leaves, move_admixedge_once, flipadmix_random, mutate_n),
                       mutprobs = NULL, opt_worst_residual = FALSE, store_intermediate = NULL, parallel = TRUE, stop_after = NULL, verbose = TRUE, ...) {

  keep = rlang::arg_match(keep)
  if(numsel >= numgraphs || numsel < 1) stop("'numsel' has to be smaller than 'numgraphs' and greater than 0!")
  if(!is.null(pops) && !is.null(initgraphs)) stop("You can't provide 'pops' and 'initgraphs' at the same time!")

  if(!is.null(initgraphs)) {
    if('data.frame' %in% class(initgraphs) || 'matrix' %in% class(initgraphs)) {
      initgraphs = graph_from_edgelist(as.matrix(initgraphs[,1:2])) %>% list
    } else if('character' %in% class(initgraphs)) {
      initgraphs = graph_from_edgelist(as.matrix(parse_qpgraph_graphfile(initgraphs)[,1:2])) %>% list
    } else if('igraph' %in% class(initgraphs)) {
      initgraphs %<>% list
    } else if('list' %in% class(initgraphs)) {
    } else stop("'initgraphs' should be a single graph or a list of 'igraph' objects")
  }

  if(is.null(pops)) {
    if(!is.null(initgraphs)) {
      pops = get_leafnames(initgraphs[[1]])
    } else if(is.array(data)) {
      pops = dimnames(data)[[1]]
    } else stop('Please provide population names!')
  }

  # if(!opt_worst_residual) precomp = qpgraph_precompute_f3(data, pops, f3basepop = outpop, ...)
  # else precomp = get_f2(data, pops, ...)
  precomp = get_f2(data, pops, ...)

  if(class(mutfuns[[1]]) == 'character') mutfuns %<>% rlang::set_names() %>% map(get)
  if(is.null(mutprobs)) {
    if(numadmix == 0 && is.null(initgraphs)) mutfuns[c('move_admixedge_once', 'flipadmix_random')] = NULL
    mutprobs = matrix(1, numgen, length(mutfuns)) %>% set_colnames(names(mutfuns))
  } else if(!is.matrix(mutprobs)) {
    if(length(mutprobs) != length(mutfuns)) stop("'mutfuns' and 'mutprobs' don't match")
    mutprobs = t(replicate(numgen, mutprobs)) %>% set_colnames(names(mutfuns))
  } else {
    stopifnot(nrow(mutprobs) == numgen)
    stopifnot(ncol(mutprobs) == length(mutfuns))
    if(!isTRUE(all.equal(sort(colnames(mutprobs)), sort(names(mutfuns))))) mutprobs %>% set_colnames(names(mutfuns))
  }
  mutlist = get_mutfuns(mutfuns, mutprobs, fix_outgroup = !is.null(outpop))

  oa = function(i) optimize_admixturegraph_single(pops, precomp, mutlist = mutlist, repnum = i,
                                                 numgen = numgen, numsel = numsel,
                                                 numgraphs = numgraphs, numadmix = numadmix, numstart = numstart,
                                                 outpop = outpop, initgraphs = initgraphs, opt_worst_residual = opt_worst_residual,
                                                 parallel = parallel && numrep == 1,
                                                 stop_after = stop_after, store_intermediate = store_intermediate,
                                                 keep = keep, verbose = verbose && numrep == 1, ...)
  if(parallel && numrep > 1) {
    res = furrr::future_map(as.list(seq_len(numrep)), oa)
  } else {
    res = list()
    for(i in seq_len(numrep)) {
      if(verbose && numrep > 1) alert_info(paste0('Repeat ', i, ' out of ', numrep, '...\n'))
      res[[i]] = oa(i)
    }
  }
  res = bind_rows(res, .id='run')
  if(nrow(res) > 0) res %<>% mutate(run = as.numeric(run))
  res
}


summarize_graph = function(graph, exclude_outgroup = TRUE) {

  leaves = V(graph)[degree(graph, v = V(graph), mode = c('out')) == 0]
  if(exclude_outgroup) leaves = igraph::difference(leaves, V(graph)[2])

  paths = all_simple_paths(graph, get_root(graph), leaves, mode = 'out') %>%
    enframe(name = 'pathnum', value='path') %>% mutate(name1 = map_chr(path, ~(tail(names(.), 1))))

  get_iap = function(x, y) suppressWarnings(names(tail(which(cumsum(x != y) == 0), 1)))

  paths2 = paths %>% rename(path2 = path, pathnum2 = pathnum, name2 = name1)
  paths3 = paths %>% rename(path3 = path, pathnum3 = pathnum, name3 = name1)

  tripletopo = expand_grid(paths, paths2, paths3) %>%
    filter(pathnum != pathnum2, pathnum != pathnum3, pathnum2 != pathnum3,
           name1 != name2, name1 != name3, name2 != name3) %>%
    mutate(iap12 = map2_chr(path, path2, get_iap),
           iap13 = map2_chr(path, path3, get_iap),
           iap23 = map2_chr(path2, path3, get_iap)) %>%
    mutate(topo = case_when(iap13 == iap23 ~ 0, iap12 == iap23 ~ 1, iap12 == iap13 ~ 2),
           top = case_when(topo == 0 ~ iap13, topo == 1 ~ iap12, topo == 2 ~ iap12)) %>%
    group_by(name1, name2, name3, top) %>%
    summarize(x12 = any(topo == 0) & !any(topo == 1),
              x21 = any(topo == 0) & any(topo == 1),
              x13 = any(topo == 1) & !any(topo == 0),
              x31 = any(topo == 1) & any(topo == 0),
              x23 = any(topo == 2) & !any(topo == 0),
              x32 = any(topo == 2) & any(topo == 0)) %>%
    group_by(name1, name2, name3) %>%
    #summarize(x13 = any(x13), x23 = any(x23), x31 = any(x31), x32 = any(x32), x12 = any(x12), x21 = any(x21),
    summarize(across(starts_with('x'), any),
              toposet = paste0(x12+0, x21+0, x13+0, x31+0, x23+0, x32+0, collapse='')) %>%
    ungroup %>%
    #mutate(tlr = !x13 & !x31 & (x23 | x32) & (x12 | x21))
    mutate(tlr = !x13 & (x23 | x32) & (x12 | x21))

  tripletopo
}


#' Summarize triples across graphs
#'
#' This function summarizes topologies of population triples across graphs.
#'
#' @param graphs A list of graphs
#' @return A data frame with one line for each population triple and columns summarizing the relationship of each triple across graphs.
#' \itemize{
#' \item `clade12`: Fraction of graphs in which `pop1` and `pop2` form a clade
#' \item `x12`: Fraction of graphs in which `pop1` admixes into `pop2` at the exclusion of `pop3`
#' \item `toptopo`: A binary string representation of the most common topology. Digits represent `x12`, `x21`, `x13`, `x31`, `x23`, `x32`
#' \item `toptopocnt`: The number of graphs in which `toptopo` was observed
#' \item `topos`: The counts of all topologies
#' }
summarize_triples = function(graphs) {
  # results is output from 'find_graphs'
  # takes at most one graph from each independent run

  tibble(graph = graphs) %>%
    rowwise %>%
    mutate(topo = list(summarize_graph(graph))) %>%
    unnest(topo) %>%
    group_by(name1, name2, name3, toposet) %>%
    mutate(cnt = n()) %>%
    group_by(name1, name2, name3) %>%
    mutate(clade12 = !(x13|x31|x23|x32),
           clade13 = !(x12|x21|x23|x32),
           clade23 = !(x12|x21|x13|x31)) %>%
    summarize(across(starts_with(c('x','clade')), mean),
              #clade = mean(substr(toposet, 1, 4) == '0000'),
              toptopo = toposet[cnt = max(cnt)][1],
              toptopocnt = max(cnt),
              topos = list(setNames(cnt, toposet))) %>%
    ungroup %>%
    rename(pop1 = name1, pop2 = name2, pop3 = name3)
}


#' Find identical graphs
#'
#' @param igraphlist A list with admixture graphs
#' @return An integer vector with isomorphism classes.
#' Graphs with the same number have identical topology (but may have different labels).
#' @export
#' @seealso \code{\link{isomorphism_classes2}}
isomorphism_classes = function(igraphlist) {
  # returns integer vector with the same length as 'igraphlist', which assigns each graph to a class
  # only considers topology

  numgraph = length(igraphlist)
  if(numgraph == 0) return(numeric())
  if(numgraph == 1) return(1)

  leaflist = map(igraphlist, ~names(get_leaves(.)))

  cmb = combn(numgraph, 2)
  iso = map2_lgl(cmb[1,], cmb[2,], ~igraph::isomorphic(igraphlist[[.x]], igraphlist[[.y]]))

  sets = rep(NA, numgraph)
  sets[1] = 1
  for(i in seq_len(length(iso))) {
    if(iso[i]) {
      sets[cmb[2, i]] = sets[cmb[1, i]]
    } else if(is.na(sets[cmb[2, i]])) {
      sets[cmb[2, i]] = max(sets, na.rm=T) + 1
    }
  }
  sets
}

#' Find identical graphs
#'
#' @param igraphlist A list with admixture graphs
#' @return An integer vector with isomorphism classes.
#' Graphs with the same number have identical topology and leaf labels (but may have different internal labels).
#' @export
#' @seealso \code{\link{isomorphism_classes}}
isomorphism_classes2 = function(igraphlist) {

  # considers topology and leaf labels
  # runs `simplify_graph` on all graphs before comparing them

  numgraph = length(igraphlist)
  if(numgraph == 0) return(numeric())
  if(numgraph == 1) return(1)

  hashes = map_chr(igraphlist, graph_hash)
  factor(hashes, levels = unique(hashes)) %>% as.numeric
}

#' Return all valid qpAdm models for an admixturegraph
#'
#' For large admixturegraph, there may be a large number of valid qpAdm models!
#'
#' @param graph An admixture graph as igraph object
#' @param add_outgroup Should the graph outgroup be added to the qpAdm right populations?
#' Technically this shouldn't be an informative outgroup for qpAdm.
#' @param nested Should a nested data frame be returned (`TRUE`), or should populations be concatenated
#' into strings (`FALSE`)?
#' @param abbr Maximum number of characters to print for each population. The default (-1) doesn't abbreviate the names.
#' @examples
#' \dontrun{
#' qpadm_models_old(igraph2, add_outgroup = TRUE)
#' qpadm_models_old(igraph2, add_outgroup = TRUE) %>% slice(1) %$% list(target, left, right)
#' }
qpadm_models_old = function(graph, add_outgroup=FALSE, nested = TRUE, abbr = -1) {
  # don't do this for large models

  outgroup = get_outpop(graph)
  nleaves = graph %>% get_leaves %>% length

  # all triples which could form target - left pop - right pop
  tlr = summarize_graph(graph) %>% filter(tlr)
  if(nrow(tlr) == 0) return(tibble())
  # table of all possible right pops for each combination of target and right pop
  rightpops = tlr %>% group_by(name1, name2) %>% summarize(right = list(name3)) %>% ungroup %>% unnest_longer(right)
  # table of all possible combinations of target and power set of left pops (restricted to nleaves/2
  # because we need more rightpops than leftpops)
  leftpops = tlr %>% group_by(name1) %>%
    summarize(left = list(power_set(unique(name2), nmax=min(length(unique(name2)), floor(nleaves/2))))) %>%
    ungroup %>% unnest_longer(left) %>% group_by(name1) %>% mutate(powerset = seq_len(n())) %>% ungroup %>%
    unnest_longer(left)
  # table of all possible combinations of target, power set of left pops, and right, where right is right
  # for every left pop of power set
  out = leftpops %>% group_by(name1, powerset) %>% mutate(numleft = n()) %>% filter(numleft > 1) %>%
    left_join(rightpops, by=c('name1', 'left'='name2')) %>% group_by(name1, powerset, right) %>%
    mutate(numoginset = n()) %>% group_by(name1, powerset) %>% filter(numoginset == numleft) %>%
    select(-numoginset) %>% ungroup %>% rename(target = name1)

  # group so that each qpadm model is one line
  out %<>% group_by(target, powerset) %>%
    summarize(left = list(sort(unique(left))), right = list(sort(unique(right)))) %>% ungroup %>% select(-powerset)

  # add outpop to right pops (even though it shouldn't be a right-group-outgroup in the qpAdm sense)
  if(add_outgroup) out %<>% mutate(right = modify(right, ~sort(c(., outgroup))))

  # only keep models where nright > nleft
  out %<>% filter(right %>% map_int(length) > left %>% map_int(length))

  # add hash to be able to join on qpadm models
  out %>%
    mutate(hash_tl = map2_chr(target, left, ~digest::digest(list(.x, .y)))) %>%
    mutate(hash_tlr = pmap_chr(list(target, left, right), ~digest::digest(list(..1, ..2, ..3))))

  if(abbr != -1) out %<>% mutate(target = str_sub(target, 1, abbr),
                                 left = map(left, ~str_sub(., 1, abbr)),
                                 right = map(right, ~str_sub(., 1, abbr)))
  if(!nested) out %<>% select(target, left, right) %>%
    mutate(left = map_chr(left, ~paste(., collapse=',')), right = map_chr(right, ~paste(., collapse=',')))

  out

}

identify_edge = function(graph, from, to) {
  # this function returns the original from node for given a desimplified graph, and an admixture edge a simplified graph
  nam = names(V(graph))
  if(!from %in% nam || !to %in% nam) return(from)
  shortest_paths(graph, from, to, mode = 'out')$vpath[[1]] %>% names %>% tail(2) %>% head(1)
}

#' Find all trees which are part of the admixture graph
#'
#' @export
#' @param graph Admixture graph in `igraph` format
#' @return A data frame with columns `name` and `graph`
#' @examples
#' \dontrun{
#' trees = graph_splittrees(example_igraph)
#' # now evaluate the trees
#' trees %>%
#'   rowwise %>%
#'   mutate(res = list(qpgraph(example_f2_blocks, graph))) %>%
#'   unnest_wider(res)
#' }
graph_splittrees = function(graph, return_admix = FALSE, simplify = TRUE) {
  # splits an admixture graph into trees
  if(!'igraph' %in% class(graph)) {
    graph %<>% graph_from_edgelist
  }
  edges = graph %>% simplify_graph %>% as_edgelist %>%
    as_tibble(.name_repair = ~c('from', 'to'))
  leaves = get_leafnames(graph)

  admixmat = edges %>% mutate(i = 1:n()) %>% group_by(to) %>% mutate(cnt = n(), j = 1:n()) %>% ungroup %>%
    filter(cnt == 2) %>% select(to, i, j) %>% spread(to, i) %>% select(-j) %>% as.matrix
  nadmix = ncol(admixmat)
  if(nadmix == 0) return(list(graph))
  #mutate(itree = map(itree, ~delete_vertices(., setdiff(get_leafnames(.), get_leafnames(tree)))))
  indices = expand.grid(replicate(ncol(admixmat), 1:2, simplify = FALSE)) %>% as.matrix
  am = map(1:nrow(indices), ~slice(edges, -admixmat[cbind(indices[.,], 1:nadmix)])) %>% map(as.matrix)
  trees = am %>% map(graph_from_edgelist)

  if(return_admix) {
  admedges = am %>% map(~as_tibble(.) %>% filter(to %in% colnames(admixmat)) %>%
                          rowwise %>% mutate(from = identify_edge(graph, from, to)) %>% ungroup)
  }
  out = trees %>% map(~prune_extra_leaves(., leaves))
  if(simplify) out %<>% map(simplify_graph)
  out %<>% enframe(value = 'graph')
  if(return_admix) out %<>% mutate(admedges) #%>% rowwise %>% mutate(evec = list(sort(paste(edges[,1], ' ' , edges[,2])))) %>% ungroup
  out
}

prune_extra_leaves = function(graph, keep) {
  leaves = get_leafnames(graph)
  while(length(setdiff(leaves, keep)) > 0) {
    graph = igraph::delete_vertices(graph, setdiff(leaves, keep))
    leaves = get_leafnames(graph)
  }
  graph
}

#' Find all trees within SPR distance of 1 of all graph component trees
#'
#' Returns all trees which can be reached through one iteration of subtree-prune-and-regraft on any graph component tree
#' @export
#' @param graph An admixture graph
#' @return A data frame with all trees
decomposed_tree_neighbors = function(graph) {

  graph %>% simplify_graph %>% graph_splittrees %$% graph %>% map(tree_neighbors) %>%
    bind_rows %>% rename(graph = itree) %>%
    mutate(isoclass = isomorphism_classes2(graph)) %>%
    filter(!duplicated(isoclass)) %>% select(-isoclass)
}


#' Find all trees within SPR distance of 1
#'
#' Returns all trees which can be reached through one iteration of subtree-prune-and-regraft
#' @export
#' @param tree A tree in `igraph` format
#' @return A data frame with all trees
tree_neighbors = function(tree) {
  # returns nested data frame with all trees within edit distance 1
  root = get_root(tree)
  gen1 = neighbors(tree, root, mode = 'out')
  nodes = difference(V(tree), c(root, gen1))
  map(nodes, ~tree_neighbors_single(tree, .)) %>% bind_rows
}


tree_neighbors_single = function(tree, node) {
  # returns nested data frame with all trees within edit distance 1,
  # that are created by cutting all edges leading to node
  if(class(node) != 'character') node = names(node)
  downstream = subcomponent(tree, node, mode = 'out')
  root = get_root(tree)
  outgroup = neighbors(tree, root, mode = 'out') %>% igraph::intersection(get_leaves(tree))
  parent = neighbors(tree, node, mode = 'in')
  sibling = neighbors(tree, parent, mode = 'out')
  from = names(difference(V(tree), c(downstream, root, outgroup, parent, sibling)))
  tibble(from) %>% mutate(to = node, itree = map2(from, to, ~replace_treeedge(tree, .x, .y)))
}

replace_treeedge = function(tree, from, to) {
  # removes edge leading to node 'to', and adds edge beginning above node 'from'
  parent = names(neighbors(tree, to, mode = 'in'))
  grandparent = names(neighbors(tree, parent, mode = 'in'))
  sibling = setdiff(names(neighbors(tree, parent, mode = 'out')), to)
  newgrandparent = names(neighbors(tree, from, mode = 'in'))
  tree %>%
    add_edges(c(grandparent, sibling)) %>%
    igraph::delete_vertices(parent) %>%
    add_vertices(1, name = parent) %>%
    delete_edges(paste(newgrandparent, from, sep = '|')) %>%
    add_edges(c(parent, to, newgrandparent, parent, parent, from))
}

graph_plusone_old = function(graph) {
  # returns all graphs with one more edge
  desimplify = !is_simplified(graph)
  if(desimplify) graph %<>% simplify_graph

  newedges = find_newedges_cautiously(graph)
  fn = ~insert_admix_old(graph, .x, .y)
  if(desimplify) fn %<>% compose(desimplify_graph, .dir = 'forward')
  newedges %>% mutate(graph = map2(from, to, fn))
}

#' Find all graphs which result from adding one admixture edge
#'
#' @export
#' @param graph Admixture graph in `igraph` format
#' @param ntry Specify this to return only a subset of all possible graphs with one more edge
#' @return A data frame with columns `from`, `to`, and `graph`
#' @examples
#' \dontrun{
#' newgraphs = graph_plusone(example_igraph)
#' # now evaluate the new graphs
#' newgraphs %>%
#'   rowwise %>%
#'   mutate(res = list(qpgraph(example_f2_blocks, graph))) %>%
#'   unnest_wider(res)
#' }
graph_plusone = function(graph, ntry = Inf) {
  # returns all graphs with one more edge

  desimplify = !is_simplified(graph)
  if(desimplify) graph %<>% simplify_graph
  out = graph %>% find_newedges
  if(is.finite(ntry)) out %<>% slice_sample(n = ntry, replace = TRUE)
  out %<>% rowwise %>%
    mutate(g = list(insert_admix(graph, source_from, source_to, dest_from, dest_to)))
  if(desimplify) out %<>% mutate(g = list(desimplify_graph(g)))
  out %>% ungroup %>% rename(graph = g)
}

#' Find all graphs which result from removing one admixture edge
#'
#' @export
#' @param graph Admixture graph in `igraph` format
#' @return A data frame with columns `from`, `to`, and `graph`
#' @examples
#' \dontrun{
#' newgraphs = graph_minusone(example_igraph)
#' # now evaluate the new graphs
#' newgraphs %>%
#'   rowwise %>%
#'   mutate(res = list(qpgraph(example_f2_blocks, graph))) %>%
#'   unnest_wider(res)
#' }
graph_minusone = function(graph, ntry = Inf) {
  # returns all graphs with one admixture edge removed
  desimplify = !is_simplified(graph)
  if(desimplify) graph %<>% simplify_graph
  # fn = ~delete_edges(graph, paste(.x, .y, sep = '|')) %>%
  #   igraph::delete_vertices(setdiff(get_leafnames(.), get_leafnames(graph))) %>%
  #   simplify_graph
  fn = ~delete_admix(graph, .x, .y)
  if(desimplify) fn %<>% compose(desimplify_graph, .dir = 'forward')
  graph %>% find_admixedges %>% slice_sample(n = min(100, ntry), replace = TRUE) %>%
    mutate(graph = map2(from, to, fn))
}

#' Find all graphs which result from adding and removing one admixture edge
#'
#' @export
#' @param graph Admixture graph in `igraph` format
#' @return A data frame with columns `source_from`, `source_to`, `dest_from`, `dest_to`, and `graph`
#' @examples
#' \dontrun{
#' newgraphs = graph_minusplus(example_igraph)
#' # now evaluate the new graphs
#' newgraphs %>%
#'   rowwise %>%
#'   mutate(res = list(qpgraph(example_f2_blocks, graph))) %>%
#'   unnest_wider(res)
#' }
graph_minusplus = function(graph) {
  graph %>% graph_minusone %>%
    mutate(graph2 = map(graph, graph_plusone)) %>%
    select(-graph) %>% unnest(graph2) %>%
    mutate(isoclass = isomorphism_classes2(graph)) %>%
    filter(!duplicated(isoclass)) %>% select(-isoclass)
}

graph_plusn = function(graphlist, n = 1, ntry = Inf) {
  if(n == 0) return(graphlist)
  map(graphlist, ~graph_plusone(., ntry = ntry)$graph) %>%
    flatten %>% graph_plusn(n-1, ntry = 1)
}

graph_minusn = function(graphlist, n = 1, ntry = Inf) {
  if(n == 0) return(graphlist)
  map(graphlist, ~graph_minusone(., ntry = ntry)$graph) %>%
    flatten %>% graph_minusn(n-1, ntry = 1)
}

#' Find all valid graphs which result from flipping one admixture edge
#'
#' @export
#' @param graph Admixture graph in `igraph` format
#' @return A data frame with columns `from`, `to`, and `graph`
#' @examples
#' \dontrun{
#' newgraphs = graph_flipadmix(example_igraph)
#' # now evaluate the new graphs
#' newgraphs %>%
#'   rowwise %>%
#'   mutate(res = list(qpgraph(example_f2_blocks, graph))) %>%
#'   unnest_wider(res)
#' }
graph_flipadmix = function(graph) {

  desimplify = !is_simplified(graph)
  if(desimplify) graph %<>% simplify_graph
  admixedges = graph %>% find_admixedges
  el = igraph::as_edgelist(graph)
  fn = ~flipadmix(el, .x, .y)
  out = admixedges %>%
    mutate(graph = map2(from, to, fn)) %>%
    filter(!map_lgl(graph, is.null))
  if(nrow(out) == 0) return()
  if(desimplify) out %<>% mutate(graph = map(graph, desimplify_graph))
  out
}

flipadmix = function(graph, from, to) {

  if('igraph' %in% class(graph)) edges = as_edgelist(graph)
  else edges = graph
  edges %<>% as.matrix
  row = which(edges[,1] == from & edges[,2] == to)
  edges[row,] = c(to, from)
  g = igraph::graph_from_edgelist(edges)
  # if(!is.dag(g) ||
  #    max(degree(g, mode = 'in')) > 2 ||
  #    max(degree(g, mode = 'out')) > 2 ||
  #    max(degree(g, mode = 'all')) > 3) g = NULL
  if(!is_valid(g)) g = NULL
  g
}

flipadmix2 = function(graph, from, to) {

  if('igraph' %in% class(graph)) edges = as_edgelist(graph)
  else edges = graph
  edges %<>% as.matrix
  row1 = which(edges[,1] == from & edges[,2] == to)
  row2 = which(edges[,2] == from)[1]
  edges[row1,] = c(to, from)
  g = igraph::graph_from_edgelist(edges)
  if(!is.na(row2)) edges[row2,] = rev(edges[row2,])
  g2 = igraph::graph_from_edgelist(edges)
  if(is_valid(g2)) return(g2)
  if(is_valid(g)) return(g)
}

#' Add a population to an admixture graph
#'
#' @export
#' @param graph An admixture graph
#' @param pop Population to add to the graph
#' @return Admixture graph with the added population
#' @seealso \code{\link{insert_leaf}} adds pop at a specific position
graph_addleaf = function(graph, pop) {
  graph %>%
    find_normedges(exclude_first = TRUE) %>%
    mutate(graph = map2(from, to, ~insert_leaf(graph, pop, .x, .y)))
}



#' Find possible new edges
#'
#' @param graph An admixture graph
#' @return A data frame with columns `from` and `to`. New edges which begin above `from`
#'   and end above `to` could be inserted
#' @export
#' @seealso \code{\link{find_normedges}} \code{\link{find_admixedges}}
find_newedges = function(graph, fix_outgroup = TRUE, all = TRUE) {
  # edge pairs are defined by 4 vertex names
  # todo: implement remove_redundant

  if(!all) return(find_newedges_cautiously(graph))
  dmat = igraph::distances(graph, mode = 'out')
  edges = graph %>% as_edgelist %>% as_tibble(.name_repair = ~c('source_from', 'source_to'))
  if(fix_outgroup) {
    root = get_rootname(graph)
    outpop = get_outpop(graph)
    if(!is.null(outpop)) edges %<>% filter(source_from != root | source_to != outpop)
  }
  edges %>%
    expand_grid(rename(edges, dest_from=source_from, dest_to=source_to)) %>%
    filter(source_from != dest_from, source_to != dest_to, source_to != dest_from, dest_to != source_from) %>%
    rowwise %>% filter(!is.finite(dmat[dest_to, source_from])) %>% ungroup
}

find_newedges_cautiously = function(graph) {
  # returns a two column data frame with edges that can be inserted into the graph

  edges = matrix(NA, 0, 2)
  root = get_root(graph)
  firstgen = neighbors(graph, root, mode='out')
  admix = V(graph)[(degree(graph, mode='in') > 1)]
  admixchildren = do.call(c, igraph::ego(graph, 1, admix, mode='out'))
  admixparents = do.call(c, igraph::ego(graph, 1, admix, mode='in'))
  admixsiblings = do.call(c, igraph::ego(graph, 1, admixparents, mode='out'))
  exclude = c(admix, firstgen, root)
  if(!is.null(admixchildren)) exclude = c(exclude, admixchildren)
  cand_from = igraph::difference(V(graph), exclude)
  for(fromnode in names(cand_from)) {
    upstream = subcomponent(graph, fromnode, mode='in')
    downstream1 = neighbors(graph, fromnode, mode='out')
    siblings = neighbors(graph, upstream[2], mode='out') # upstream[2] should be parent of fromnode
    uncles = neighbors(graph, neighbors(graph, upstream[2], mode='in'), mode='out')
    exclude = c(upstream, downstream1, siblings, uncles)
    if(!is.null(admixsiblings)) exclude = c(exclude, admixsiblings)
    cand_to = names(igraph::difference(cand_from, exclude))
    if(length(cand_to) > 0) edges %<>% rbind(cbind(fromnode, cand_to))
  }
  edges %>% as_tibble(.name_repair = ~c('from', 'to'))
}


#' Find admixture edges
#'
#' @export
#' @param graph An admixture graph
#' @return A data frame with columns `from` and `to` with admixture edges
#' @seealso \code{\link{find_normedges}} \code{\link{find_newedges}}
find_admixedges = function(graph) {
  # returns a two column data frame with edges that can be removed from the graph

  edges = matrix(NA, 0, 2)
  admixnodes = which(degree(graph, mode = 'in') == 2)
  if(length(admixnodes) == 0) return(tibble(from = character(), to = character()))
  graph %>% adjacent_vertices(admixnodes, mode = 'in') %>%
    map(names) %>% enframe('to', 'from') %>%
    unnest_longer(from) %>% select(2:1)
}

#' Find drift edges
#'
#' @export
#' @param graph An admixture graph
#' @param exclude_first Do not return edge from root to outgroup
#' @return A data frame with columns `from` and `to` with drift edges
#' @seealso \code{\link{find_newedges}} \code{\link{find_admixedges}}
find_normedges = function(graph, exclude_first = FALSE) {
  # returns a two column data frame with non-admixture edges
  el = graph %>% as_edgelist
  if(exclude_first) el = el[-1,]
  el %>% as_tibble(.name_repair = ~c('from', 'to')) %>%
    group_by(to) %>% filter(n() == 1) %>% ungroup
}

#' Delete an admixture edge
#'
#' @export
#' @param graph An admixture graph
#' @param from Edge source node
#' @param to Edge target node
#' @return Admixture graph with one deleted edge
#' @seealso \code{\link{insert_admix}}
delete_admix = function(graph, from = NULL, to = NULL) {
  # returns graph with admixture edge deleted
  # does not conserve internal node names

  leaves = sort(get_leafnames(graph))
  desimplify = !is_simplified(graph)
  ograph = graph
  if(is.null(from)) {
    if(desimplify) graph %<>% simplify_graph
    if(!is_valid(graph)) stop('delete admix error')
    admix = graph %>% find_admixedges
    if(nrow(admix) == 0) {
      stop("No admix edges found!")
    } else {
      admix %<>% slice_sample(n=1)
    }
    from = admix$from
    to = admix$to
  }
  parents = neighbors(graph, from, mode = 'in')
  if(length(parents) == 1 && degree(graph, from) == 2) {
    del = from
    if(length(neighbors(graph, parents, mode = 'in')) == 2) del = c(del, names(parents))
    graph %<>% igraph::delete_vertices(del)
  } else {
    graph %<>% delete_edges(paste(from, to, sep = '|'))
  }
  graph %<>% simplify_graph
  newleaves = setdiff(get_leafnames(graph), leaves)
  while(length(newleaves) > 0) {
    g = graph %>% igraph::delete_vertices(newleaves) %>% simplify_graph()
    newleaves = setdiff(get_leafnames(g), leaves)
    graph = g
  }
  if(degree(graph, get_root(graph), mode = 'out') == 1) {
    graph %<>% igraph::delete_vertices(get_root(graph))
  }
  if(desimplify) graph %<>% desimplify_graph
  if(!isTRUE(all.equal(sort(get_leafnames(graph)), leaves)) || !is_valid(graph)) stop('xxxx')
  graph
}


#' Remove population from graph
#'
#' @export
#' @param graph An admixture graph
#' @param leaf Population to be removed
#' @return Admixture graph with removed population
#' @seealso \code{\link{insert_leaf}}
delete_leaf = function(graph, leaf) {
  # deletes leaf and all internal nodes leading to no other leaves
  desimplify = !is_simplified(graph)
  if(desimplify) graph %<>% simplify_graph
  leaves = get_leafnames(graph)
  graph %<>%
    igraph::subcomponent(leaf, mode = 'in') %>%
    keep(~length(intersect(leaves, names(igraph::subcomponent(graph, .x, mode = 'out')))) == 1) %>%
    igraph::delete_vertices(graph, .) %>%
    simplify_graph()
  if(desimplify) graph %<>% desimplify_graph
  graph
}

delete_leaves = function(graph, leaves) {
  # assumes simplified graph

  keepleaves = graph %>% get_leafnames %>% setdiff(leaves)
  keep_leaves(graph, keepleaves)
}

keep_leaves = function(graph, leaves) {
  # assumes simplified graph

  keepv = map(leaves, ~igraph::subcomponent(graph, ., mode = 'in')) %>% unlist %>% names %>% unique
  delv = setdiff(names(V(graph)), keepv)
  out = graph %>% igraph::delete_vertices(delv)
  out %>% simplify_graph
}


#' Add population to graph
#'
#' @export
#' @param graph An admixture graph
#' @param leaf Population to be added
#' @param from Source node of edge onto which `leaf` should be added
#' @param to Target node of edge onto which `leaf` should be added
#' @return Admixture graph with added population
#' @seealso \code{\link{delete_leaf}}, \code{\link{graph_addleaf}} to add `leaf` at any position
insert_leaf = function(graph, leaf, from, to) {
  # inserts new leaf at edge from -> to
  nn = paste(from, to, sep = '_')
  graph %>%
    add_vertices(2, attr = list('name' = c(nn, leaf))) %>%
    add_edges(c(from, nn, nn, to, nn, leaf)) %>%
    delete_edges(paste(from, to, sep = '|'))
}

#' Generate all trees
#'
#' This functions generates all possible trees with for a given set of leaf nodes.
#'
#' @export
#' @param leaves The leaf nodes
#' @return A list of trees in `igraph` format
generate_all_trees = function(leaves) {

  stopifnot(!'R' %in% leaves)
  if(any(str_detect(leaves, '\\|'))) stop('Leaves cannot have "|" in name!')
  init = graph_from_edgelist(matrix(c('R', leaves[1]), 1))
  add_leaves_rec(init, leaves[-1]) %>% map(~delete_vertices(.,'R'))
}

#' Generate all graphs
#'
#' This functions generates all possible admixture graphs with a set number of admixture events for a given set of leaf nodes. It's pretty slow, and may not terminate in reasonable time for more than 5 leaves and 2 admixture events. The function is similar to the \code{\link[admixturegraph]{all_graphs}} function in the \code{admixturegraph} package, but there are a few differences:
#' \itemize{
#' \item The function does not return graphs with fewer than `nadmix` admixture events
#' \item The function does not return most graphs which are unidentifiable and would have equal fits as simpler identifiable graphs (for example it does not return graphs where a node is expanded to a loop)
#' \item The function does not return duplicated graphs, as identified by the \code{\link{graph_hash}} function
#' \item The function generates unique graphs which are missing in the output of \code{\link[admixturegraph]{all_graphs}}
#' }
#' @export
#' @param leaves The leaf nodes
#' @param nadmix The number of admixture nodes
#' @param verbose Print progress updates
#' @return A list of graphs in `igraph` format
#' @seealso \code{\link[admixturegraph]{all_graphs}}, \code{\link{generate_all_trees}}, \code{\link{graph_hash}}
#' @examples
#' \dontrun{
#' graphs = generate_all_graphs(letters[1:4], 1)
#' }
generate_all_graphs = function(leaves, nadmix = 0, verbose = TRUE) {
  nleaves = length(leaves)
  if(verbose) alert_info(paste0('Generating ', numtrees(nleaves),' trees...\n'))
  trees = generate_all_trees(leaves)
  if(verbose) alert_info(paste0('Adding all possible admixutre edges...\n'))
  graphs = flatten(map(trees, ~add_edges_rec(., nadmix)))
  if(verbose) alert_info(paste0('Identifying isomorpisms in ', length(graphs),' graphs...\n'))
  hashes = map_chr(graphs, graph_hash)
  keep = which(!duplicated(hashes))
  if(verbose) alert_info(paste0('Returning ', length(keep),' unique graphs...\n'))
  graphs[keep]
}

add_leaves_rec = function(tree, leaves) {
  el = tree %>% E %>% as_ids %>% str_split('\\|')
  newtrees = map(el, ~{
    from = .[1]
    to = .[2]
    nam = paste(from, to, sep='_')
    tree %>%
      add_vertices(2, name = c(nam, leaves[1])) %>%
      add_edges(edges = c(nam, leaves[1])) %>%
      add_edges(edges = c(from, nam)) %>%
      add_edges(edges = c(nam, to)) %>%
      delete_edges(paste(from, to, sep = '|'))
  })
  if(length(leaves) == 1) return(newtrees)
  flatten(map(newtrees, ~add_leaves_rec(., leaves[-1])))
}


add_edges_rec = function(graph, nadmix) {
  if(nadmix == 0) return(list(graph))
  newgraphs = graph %>% graph_plusone %>% pull(graph)
  flatten(map(newgraphs, ~add_edges_rec(., nadmix-1)))
}


permutations = function(vec) {

  if(length(vec) == 1) return(vec)
  out = list()
  for(i in seq_len(length(vec))) {
    new = map(permutations(vec[-i]), ~c(vec[i], .))
    out = c(out, new)
  }
  out
}





#' Return all graphs created from permuting a subclade
#'
#' generates new graphs from basegraph as follows:
#' 1. generates all possible trees using `addpops` (which are not in basegraph)
#' 2. attaches trees to connection_edge, which is defined by two nodes in basegraph
#' 3. adds edges originating above each edge in `source_node`, to each node above `addpops`
#'
#' @export
#' @param basegraph an admixture graph as igraph object. (convert from edge list using `igraph::graph_from_edgelist`)
#' @param addpops a vector of population labels which are not in `basegraph`. These populations should form a clade. All possible trees will be generated and those trees will be attached to `basegraph`.
#' @param connection_edge edge in `basegraph` where the tree made from `addpops` should be attached
#' @param source_nodes nodes in `basegraph`. edges above these nodes will be added and attached to all terminal edges leading to `addpops`
#' @examples
#' \dontrun{
  #' graphlist = graphmod_pavel(example_igraph, addpops = c('pop1', 'pop2', 'pop3'),
  #'                            connection_edge = c('N2N0', 'N1N'),
  #'                            source_nodes = c('Denisova.DG', 'N2N2'))
  #' results = tibble(graph = graphlist) %>%
  #'   mutate(res = map(graph, ~qpgraph(example_f2_blocks, .))) %>%
  #'   unnest_wider(res) %>%
  #'   mutate(worstz = map_dbl(f3, ~max(abs(.$z))))
#' }
graphmod_pavel = function(basegraph, addpops, connection_edge, source_nodes) {

  c1 = connection_edge[1]
  c2 = connection_edge[2]
  cn = 'connection_node'
  tn = 'tree_R'

  trees = generate_all_trees(addpops) %>% map(~{
    leaves = get_leaves(.)
    internal = difference(V(.), leaves)
    set_vertex_attr(., 'name', index = internal, value = paste0('tree_', names(internal)))
    })
  graphs = trees %<>% map(~{
    igraph::union(basegraph, .) %>%
      add_vertices(1, name = cn) %>%
      add_edges(c(c1, cn, cn, c2, cn, tn)) %>%
      delete_edges(paste(c1, c2, sep='|'))
    })
  e = expand_grid(source_nodes, addpops)
  # continue here: update this to work with new insert_admix
  graphs %>% map(~insert_edges(., e$source_nodes, e$addpops))
}

#' Split nodes with more than two edges
#'
#' @export
#' @param graph An admixture graph
#' @return A new admixture graph in which no node has more than two incoming or outgoing edges
split_multifurcations = function(graph) {
  # input is graph as edge list matrix
  # output is a four column matrix with 'lower' and 'upper'

  # probably doesn't work for nodes with more than 2 or 3 incoming edges

  ig = class(graph)[1] == 'igraph'
  if(ig) graph %<>% igraph::as_edgelist()

  multi = names(which(table(graph[,1]) > 2))
  if(ncol(graph) == 2) graph = cbind(graph, NA, NA)

  for(i in seq_len(length(multi))) {
    rows = which(graph[,1] == multi[i])
    torows = which(graph[,2] == multi[i])
    newnam = paste0(graph[rows, 1], '_multi', i, '_', seq_along(rows))
    graph[rows, 1] = newnam
    graph[torows, 2] = newnam[1]
    graph %<>% rbind(cbind(newnam[-length(newnam)], newnam[-1], '0', '1e-9'))
  }

  leaves = setdiff(graph[,2], graph[,1])
  leafadmix = intersect(leaves, names(which(table(graph[,2]) >= 2)))
  for(i in seq_len(length(leafadmix))) {
    rows = which(graph[,2] == leafadmix[i])
    newnam = paste0(leafadmix[i], '_pre', i)
    graph[rows, 2] = newnam
    graph %<>% rbind(cbind(newnam, leafadmix[i], NA, NA))
  }

  multiin = names(which(table(graph[,2]) > 2))
  for(i in seq_len(length(multiin))) {
    rows = which(graph[,1] == multiin[i])
    torows = which(graph[,2] == multiin[i])
    newnam = paste0(graph[rows, 1], '_multi', i, '_', seq_len(length(torows)-2))
    #graph[rows, 1] = newnam[1]
    graph[torows[-1], 2] = rep(newnam, each=2)
    graph %<>% rbind(cbind(newnam, graph[rows, 1], '0', '1e-9'))
  }

  graph %<>%
    as_tibble(.name_repair = ~c('from', 'to', 'lower', 'upper')) %>%
    type_convert(col_types = cols())
  if(ig) graph %<>% edges_to_igraph()
  graph
}



#' Returns a signature of a graph consisting of the left and right descendent leaf nodes of each internal node (sorted and concatenated)
#'
#' Can be used to determine how often internal nodes occur in a list of other well fitting models
#' @param graph An admixture graph
#' @return A graph signature as character vector
#' @examples
#' \dontrun{
#' sigs = example_winners %>% mutate(sig = map(igraph, node_signature)) %$%
#'          sig %>% unlist %>% table %>% c
#' node_signature(example_winners$igraph[[1]])
#' }
node_signature = function(graph) {
  graph %<>% simplify_graph
  inner = setdiff(names(V(graph)), get_leafnames(graph))
  inner2 = inner %>%
    map(~neighbors(graph, ., mode = 'out')) %>%
    map(names)
  leaves = get_leafnames(graph)
  map(inner2, ~map(., ~names(subcomponent(graph, ., mode = 'out'))) %>%
        map(~intersect(., leaves)) %>%
        map(sort) %>%
        map(~paste(., collapse=':')) %>%
        flatten_chr) %>%
    map(sort) %>%
    map(~paste(., collapse=' ')) %>%
    flatten_chr %>%
    set_names(inner)
}

#' Count how often each node in graph occurs in other graphs
#'
#' @param graph An admixture graph
#' @param graphlist List of graphs
node_counts = function(graph, graphlist) {
  counts = map(graphlist, node_signature) %>%
    map(unique) %>%
    unlist %>%
    table %>%
    c %>%
    enframe(name = 'signature', value = 'count')
  graph %>%
    node_signature %>%
    enframe(name = 'node', value = 'signature') %>%
    left_join(counts, by = 'signature')
}


match_graphs = function(graph1, graph2) {

  v1 = names(V(graph1))
  v2 = names(V(graph2))
  l1 = get_leafnames(graph1)
  l2 = get_leafnames(graph2)
  stopifnot(all.equal(sort(l1), sort(l2)))

  vmap = cbind(l1, l1, l1)
  for(i in rep(seq_along(v1), 2)) {
    if(v1[i] %in% vmap[,1]) next
    parents = neighbors(graph1, v1[i], mode = 'in')
    children = neighbors(graph1, v1[i], mode = 'out')
    np = length(parents)
    nc = length(children)
    cl = sort(intersect(l1, names(children)))
    indeg = unname(sort(degree(graph1, children, mode = 'in')))
    if(length(cl) > 0) {
      cands = names(neighbors(graph2, cl[1], mode = 'in'))
      for(cand in cands) {
        print(cand)
        cancchildren = neighbors(graph2, cand, mode = 'out')
        cl2 = sort(intersect(l1, names(cancchildren)))
        if(isTRUE(all.equal(cl, cl2))) {
          if(!v1[i] %in% vmap[,1] && !cand %in% vmap[,2]) {
            vmap = rbind(vmap, c(v1[i], cand, v1[i]))
            break
          }
        }
      }
      if(v1[i] %in% vmap[,1]) next
    }
    matchedparents = intersect(vmap[,1], names(parents))
    if(length(matchedparents) == 0) next
    mp2 = vmap[vmap[,1] == matchedparents[1], 2]
    stopifnot(mp2 %in% names(V(graph2)))
    if(length(mp2) == 1 && !is.na(mp2)) {
      cands = names(neighbors(graph2, mp2, mode = 'out'))
    }
    for(cand in cands) {
      print(cand)
      cancchildren = neighbors(graph2, cand, mode = 'out')
      if(length(cl) > 0) {
        cl2 = sort(intersect(l1, names(cancchildren)))
        if(isTRUE(all.equal(cl, cl2))) {
          if(!v1[i] %in% vmap[,1] && !cand %in% vmap[,2]) {
            vmap = rbind(vmap, c(v1[i], cand, v1[i]))
            break
          }
        }
      } else {
        if(!v1[i] %in% vmap[,1] && !cand %in% vmap[,2]) {
          vmap = rbind(vmap, c(v1[i], cand, v1[i]))
          break
        }
        # indeg2 = unname(sort(degree(graph2, names(cancchildren), mode = 'in')))
        # if(isTRUE(all.equal(indeg, indeg2))) {
        #   vmap = rbind(vmap, c(v1[i], cand, v1[i]))
        #   break
        # }
      }
    }
  }

  # add non-matched nodes
  rem1 = setdiff(v1, vmap[,1])
  rem2 = setdiff(v2, vmap[,2])
  vmap = rbind(vmap, cbind(rem1, NA, paste0('newnode1_', 1:length(rem1))))
  vmap = rbind(vmap, cbind(NA, rem2, paste0('newnode2_', 1:length(rem2))))

  # edge lists with new names
  el1 = as_edgelist(graph1)
  el2 = as_edgelist(graph2)
  el1[,1] = vmap[match(el1[,1], vmap[,1]), 3]
  el1[,2] = vmap[match(el1[,2], vmap[,1]), 3]
  el2[,1] = vmap[match(el2[,1], vmap[,2]), 3]
  el2[,2] = vmap[match(el2[,2], vmap[,2]), 3]
  ee1 = paste(el1[,1], el1[,2])
  ee2 = paste(el2[,1], el2[,2])

  comb = rbind(el1, el2)
  comb = comb[!duplicated(comb),]
  in1 = paste(comb[,1], comb[,2]) %in% ee1
  in2 = paste(comb[,1], comb[,2]) %in% ee2
  comb = cbind(comb, ifelse(in1 & in2, 'both', ifelse(in1, 'g1', 'g2')))
  comb
}






leafdistances = function(graph) {

  graph %<>% simplify_graph
  leaves = get_leafnames(graph)
  dist = igraph::distances(graph, mode = 'out') %>% as_tibble(rownames = 'from') %>%
    pivot_longer(-from, names_to = 'to', values_to = 'dist')
  dist2 = dist %>% filter(is.finite(dist), from != to)
  dist2 %>% left_join(dist2, by = 'from') %>% filter(to.x != to.y, to.x %in% leaves, to.y %in% leaves) %>% mutate(pp = paste(to.x, to.y)) %>% group_by(pp) %>% top_n(1, -jitter(dist.y)) %>% ungroup %>% transmute(from = to.y, to = to.x, dist = dist.y)

}

reconstruct_from_leafdist = function(leafdist) {

  leaves = union(leafdist$from, leafdist$to)
  pref = shortest_unique_prefixes(leaves)

  parents = leafdist %>% group_by(to) %>% filter(dist == 1) %>% mutate(node = paste0(to, '_p1'), dist = 1, d = list(union(from, to))) %>% rowwise %>% mutate(dnum = length(d), desc = paste(sort(d), collapse = ' '), from = to) %>% ungroup %>% select(node, dist, from, d, dnum, desc) %>% distinct %>% group_by(desc) %>% top_n(1, node) %>% ungroup

  # merge with desc from parent generation
  leafdist %>% filter(from %in% parents$from) %>% group_by(to) %>% filter(dist == 2) %>% mutate(node = paste0(to, '_p2'), dist = 2, desc = paste(sort(union(from, to)), collapse = ' '), dnum = length(union(from, to)), from = to) %>% ungroup %>% select(node, dist, from, dnum, desc) %>% distinct %>% group_by(desc) %>% top_n(1, node) %>% ungroup



  parents = leafdist %>% filter(dist == 1) %>% group_by(from) %>% mutate(d = list(union(from, to))) %>% rowwise %>% mutate(dnum = length(d), desc = paste(sort(shortest_unique_prefixes(d)), collapse = '_')) %>% ungroup %>% select(from, dist, d, dnum, desc) %>% distinct

  p2 = leafdist %>% filter(dist == 2) %>% filter(from %in% parents$from) %>% group_by(from) %>% mutate(d = list(union(from, to))) %>% rowwise %>% mutate(dnum = length(d), desc = paste(sort(shortest_unique_prefixes(d)), collapse = '_')) %>% ungroup %>% select(from, dist, d, dnum, desc) %>% distinct

  p2 %>% left_join(parents %>% select(from, d), by = 'from') %>% rowwise %>% mutate(d = list(union(to, d)), dnum = length(d), desc = paste(sort(shortest_unique_prefixes(d)), collapse = '_')) %>% ungroup


  graph = graph.empty()
  graph %<>% add_vertices(length(leaves), name = leaves)
  for(i in seq_along(leaves)) {
    l = leaves[i]
    ld = leafdist %>% filter(from == l)
    reachable = l
    cat(paste0(i,'\r'))
    for(j in seq_len(max(ld$dist))) {
      nn = ld %>% filter(dist == j)
      reachable = union(reachable, nn$to)
      oldnam = if(j == 1) l else newnam
      newnam = shortest_unique_prefixes(reachable) %>% sort %>% paste(collapse = '_') %>% paste0('_', .)
      if(!newnam %in% names(V(graph))) graph %<>% add_vertices(1, name = newnam)
      if(oldnam != newnam) graph %<>% add_edges(c(newnam, oldnam))
      if(nrow(nn) == 0) break
    }
  }


}



triples_to_tree = function() {
  # takes choose(npop, 3) population triples, and assembles a tree

}



f4_to_triples = function(f2_blocks, outgroup) {
  # infers possible 3 population trees (rooted; 4 pops with outpop) from fstats

  pops = dimnames(f2_blocks)[[1]]
  pp = setdiff(pops, outgroup)
  f4stats = f4(example_f2_blocks, pop1 = outgroup, pop2 = pp, pop3 = pp, pop4 = pp) %>%
    filter(pop2 != pop3, pop2 != pop4, pop3 < pop4)
  outprobs = f4stats %>% rowwise %>% mutate(pp = list(sort(c(pop2, pop3, pop4))), p1 = pp[[1]], p2 = pp[[2]], p3 = pp[[3]], out = ifelse(z < 0, pop4, pop3), oz = abs(z)) %>% group_by(p1, p2, p3, out) %>% summarize(oz = mean(oz))

  tripletrees = outprobs %>% top_n(1, oz)

}

tripletrees_to_pairlists = function() {
  # takes one tree for each triple and uses them to characterize each pop pair

  dat = expand_grid(pop1 = pp, pop2 = pp) %>% filter(pop1 < pop2) %>% rowwise %>% mutate(left = list(NULL), right = list(NULL), out = list(NULL))

  t1 = tripletrees %>% rename(l = p1, r = p2, x = p3) %>% group_by(l, r) %>%
    summarize(o = list(x[out == x]), left = list(x[out == l]), right = list(x[out == r]))
  t2 = tripletrees %>% rename(l = p1, r = p3, x = p2) %>% group_by(l, r) %>%
    summarize(o = list(x[out == x]), left = list(x[out == l]), right = list(x[out == r]))
  t3 = tripletrees %>% rename(l = p2, r = p3, x = p1) %>% group_by(l, r) %>%
    summarize(o = list(x[out == x]), left = list(x[out == l]), right = list(x[out == r]))

  t1 = tripletrees %>% rename(l = p1, r = p2, x = p3)
  t2 = tripletrees %>% rename(l = p1, r = p3, x = p2)
  t3 = tripletrees %>% rename(l = p2, r = p3, x = p1)
  tt = bind_rows(t1, t2, t3) %>% group_by(l, r) %>%
    summarize(o = list(x[out == x]), left = list(x[out == l]), right = list(x[out == r])) %>%
    rowwise %>% mutate(lo = length(o), lleft = length(left), lright = length(right))

}


tree_to_triplesig = function(tree) {
  # takes an igraph tree and returns a data frame with columns lr, o, present
  # output sorted by lr, o

  leaves = sort(get_leafnames(tree))
  internal = setdiff(names(igraph::V(tree)), leaves)
  mat = map(internal, ~leaves %in% names(igraph::subcomponent(tree, ., mode = 'out'))) %>%
    do.call(rbind, .) %>% set_colnames(leaves) %>% set_rownames(internal)
  cmb = combn(leaves,2)
  (mat[,cmb[1,]] & mat[,cmb[2,]]) %>%
    set_colnames(paste(cmb[1,], cmb[2,], sep = ' ')) %>%
    as_tibble(rownames = 'internal') %>%
    mutate(num = 1:n()) %>%
    pivot_longer(-c(internal, num), names_to = 'lr', values_to = 'reach') %>%
    filter(reach) %>%
    group_by(lr) %>%
    slice_max(num) %>%
    ungroup %>%
    expand_grid(o = leaves) %>%
    separate(lr, c('l', 'r'), sep = ' ', remove = F) %>%
    filter(o != l, o != r) %>%
    rowwise %>%
    mutate(inside = is.finite(igraph::distances(tree, internal, o, mode = 'out')[,1])) %>%
    ungroup %>%
    transmute(lr, o, inside) %>%
    arrange(lr, o)
}

graph_to_triplesig = function(graph) {

  graph %>%
    simplify_graph %>%
    graph_splittrees %>% pluck(2) %>%
    map(tree_to_triplesig) %>%
    bind_rows(.id = 'tree') %>%
    group_by(lr, o) %>%
    summarize(inside = any(inside)) %>%
    ungroup
}
# triples don't uniquely identify graphs


eval_plusnadmix = function(graph, qpgfun, n = 1, ntry = Inf, nonzero_f4 = NULL, admix_constraints = NULL,
                           event_order = NULL, verbose = TRUE) {

  newgraphs = tibble(graph = graph_plusn(list(graph), n = n, ntry = ntry))
  num = nrow(newgraphs)
  #newgraphs %<>% slice_sample(n = ntry)
  if(verbose) alert_info(paste0('Evaluating ', nrow(newgraphs), ' graphs...\n'))
  if(nrow(newgraphs) == 0) return(tibble())
  newgraphs %<>%
    rowwise %>%
    filter(satisfies_constraints(graph, nonzero_f4 = nonzero_f4, admix_constraints = admix_constraints,
                                 event_order = event_order)) %>% ungroup
  if(nrow(newgraphs) == 0) return(tibble())
  newgraphs %>%
    mutate(res = furrr::future_map(graph, qpgfun, .progress = verbose, .options = furrr::furrr_options(seed = TRUE))) %>%
    unnest_wider(res) %>% arrange(score)
  #%>% select(source_from, source_to, dest_from, dest_to, graph, score)
}

eval_minusnadmix = function(graph, qpgfun, n = 1, ntry = Inf, nonzero_f4 = NULL,
                            admix_constraints = NULL, event_order = NULL, verbose = TRUE) {

  newgraphs = tibble(graph = graph_minusn(list(graph), n = n, ntry = ntry))
  if(verbose) alert_info(paste0('Evaluating ', nrow(newgraphs), ' graphs...\n'))
  if(nrow(newgraphs) == 0) return(tibble())
  newgraphs %<>%
    rowwise %>%
    filter(satisfies_constraints(graph, nonzero_f4 = nonzero_f4, admix_constraints = admix_constraints,
                                 event_order = event_order)) %>% ungroup
  if(nrow(newgraphs) == 0) return(tibble())
  newgraphs %>%
    mutate(res = furrr::future_map(graph, qpgfun, .progress = verbose, .options = furrr::furrr_options(seed = TRUE))) %>%
    unnest_wider(res) %>% arrange(score)
}

eval_plusminusn = function(graph, qpgfun, n = 1, ntry = Inf, nonzero_f4 = NULL,
                           admix_constraints = NULL, event_order = NULL, verbose = TRUE) {

  if(verbose) alert_info(paste0('Adding ', n, ' edge(s)...\n'))
  plus = eval_plusnadmix(graph, qpgfun, n = n, ntry = ntry, nonzero_f4 = nonzero_f4,
                         admix_constraints = admix_constraints, event_order = event_order, verbose = verbose)
  if(verbose) alert_info(paste0('Best score: ', round(plus$score[[1]], 3), '\n'))

  if(verbose) alert_info(paste0('Removing ', n, ' edge(s)...\n'))
  plus %>% slice_min(score, with_ties = FALSE) %>% pull(graph) %>% pluck(1) %>%
    eval_minusnadmix(qpgfun, n = n, ntry = Inf, nonzero_f4 = nonzero_f4,
                     admix_constraints = admix_constraints, event_order = event_order, verbose = verbose)
}


eval_plusonepop = function(graph, pop, qpgfun, ntry = Inf, verbose = TRUE) {

  root = get_rootname(graph)
  outgroup = intersect(names(neighbors(graph, root)), get_leafnames(graph))
  newgraphs = graph %>% as_edgelist() %>% as_tibble(.name_repair = ~c('from', 'to'))
  if(length(outgroup) > 0) newgraphs %<>% filter(from != root | to != outgroup)
  newgraphs %<>% mutate(graph = map2(from, to, ~insert_leaf(graph, pop, .x, .y)))

  if(verbose) alert_info(paste0('Found ',nrow(newgraphs),' graphs. Evaluating ', min(nrow(newgraphs), ntry), '...\n'))
  newgraphs %>%
    slice_sample(n = min(100, ntry), replace = TRUE) %>%
    mutate(res = furrr::future_map(graph, qpgfun, .progress = verbose, .options = furrr::furrr_options(seed = TRUE))) %>%
    unnest_wider(res) %>% arrange(score) %>% select(from, to, graph, score)
}

evaluate_moreadmix = function(graph, qpgfun, maxadmix, ntry = Inf, verbose = TRUE) {
# adds admixture events to graph one by one, always picking the best one out of ntry randomly chosen possible ones

  nadm = numadmix(graph)
  if(nadm >= maxadmix) stop(paste0('Graph already has ', nadm, ' admixture events!'))

  for(i in seq_len(maxadmix - nadm)) {
    if(verbose) alert_info(paste0('Testing graphs with ',nadm+i,' admixture events...\n'))
    newgraphs = graph %>% eval_plusoneadmix(qpgfun, ntry = ntry)
    # if(nrow(newgraphs) > 0) {
      if(verbose) alert_info(paste0('Best score: ', round(min(newgraphs$score), 3),'\n'))
      graph = newgraphs %>% slice_min(score) %>% pull(graph) %>% pluck(1)
    # } else {
    #   warning("Can't add more admixture events!")
    #   break
    # }
  }
  graph
}


rearrange_negdrift = function(graph, from, to) {
  # assumes graph is simplified, edge is regular edge

  sib = graph %>% neighbors(from, mode = 'out') %>% names %>% setdiff(to)
  children = graph %>% neighbors(to, mode = 'out') %>% names %>% sample
  if(length(children) == 0 || length(sib) == 0) {
    return(graph)
  }
  og = get_outpop(graph)
  gnew = graph %>%
    add_edges(c(to, sib, from, children[1])) %>%
    delete_edges(paste(c(from, to), c(sib, children[1]), sep = '|'))
  if(is_valid(gnew) && (is.null(og) || isTRUE(og == get_outpop(gnew)))) return(gnew)
}


find_and_rearrange_negdrift = function(graph, qpgfun) {
  # finds most negative drift edge and returns rearranged graph

  leaves = get_leafnames(graph)
  e = qpgfun(graph)$edges %>% filter(type == 'edge', !to %in% leaves) %>% slice_min(weight, with_ties = FALSE)
  if(nrow(e) == 0) stop('No negative edge found!')
  rearrange_negdrift(graph, e$from, e$to)
}


rearrange_negadmix1 = function(graph, from, to) {
  # rearranges negative admix edge by flipping its direction if possible
  newgraph = flipadmix(graph, from, to)
  if(is_valid(newgraph)) return(newgraph)
}

rearrange_negadmix2 = function(graph, from, to) {
  # rearranges negative admix edge by flipping its counterpart's direction if possible

  parent_pos = graph %>% neighbors(to, mode = 'in') %>% names %>% setdiff(from, .)
  newgraph = flipadmix(graph, parent_pos, to)
  if(is_valid(newgraph)) return(newgraph)
}


rearrange_negadmix3 = function(graph, from, to) {
  # rearranges negative admix edge by
  # attaching admixed node to edge above parent of pos weight; neg edge is attached to parent node if possible

  leaves = sort(get_leafnames(graph))
  parent_neg = from
  parent_pos = graph %>% neighbors(to, mode = 'in') %>% names %>% setdiff(parent_neg)
  grandparent_pos = graph %>% neighbors(parent_pos, mode = 'in') %>% names
  if(length(grandparent_pos) != 1) return()
  newgraph = graph %>%
    add_edges(c(grandparent_pos, to, to, parent_pos)) %>%
    delete_edges(paste(c(grandparent_pos, parent_pos, parent_neg), c(parent_pos, to, to), sep = '|'))
  if(!is_valid(newgraph)) {
    return()
  }
  if(length(parent_neg) != 1 || length(parent_pos) != 1) stop('aaa')
  newgraph2 = newgraph %>% add_edges(c(parent_neg, parent_pos)) %>% simplify_graph
  if(is_valid(newgraph2)) newgraph = newgraph2
  else newgraph %<>% find_newedges %>% slice_sample(n=1) %>%
    mutate(g = list(insert_admix(newgraph, source_from, source_to, dest_from, dest_to))) %>%
    pull(g) %>% pluck(1) %>% simplify_graph
  leaves2 = sort(get_leafnames(newgraph))
  if(!isTRUE(all.equal(leaves, leaves2)) || !is_valid(newgraph)) return()
  nold = numadmix(graph)
  nnew = numadmix(newgraph)
  if(nnew < nold) newgraph %<>% insert_admix_n(nold - nnew)
  if(is_valid(newgraph)) return(newgraph)
}


#' Find well fitting admixture graphs
#'
#' This function generates and evaluates admixture graphs in `numgen` iterations
#' to find well fitting admixturegraphs.
#' @export
#' @param data Input data in one of three forms:
#' \enumerate{
#' \item A 3d array of blocked f2 statistics, output of \code{\link{f2_from_precomp}} or \code{\link{f2_from_geno}}
#' \item A directory which contains pre-computed f2-statistics
#' \item The prefix of genotype files
#' }
#' @param numadmix Number of admixture events within each graph. (Only relevant if `initgraph = NULL`)
#' @param outpop Name of the outgroup population
#' @param numgraphs Number of graphs in each generation
#' @param stop_gen Total number of generations after which to stop
#' @param stop_gen2 Number of generations without improvement after which to stop
#' @param stop_sec Number of seconds after which to stop
#' @param stop_score Stop once this score has been reached
#' @param initgraph Graph to start with. If it is specified, `numadmix` and `outpop` will be inferred from this graph.
#' @param mutfuns Functions used to modify graphs. Defaults to the following:
#' \itemize{
#' \item \code{\link{spr_leaves}}: Subtree prune and regraft leaves. Cuts a leaf node and attaches it
#' to a random other edge in the graph.
#' \item \code{\link{spr_all}}: Subtree prune and regraft. Cuts any edge and attaches the new orphan node
#' to a random other edge in the graph, keeping the number of admixture nodes constant.
#' \item \code{\link{swap_leaves}}: Swaps two leaf nodes.
#' \item \code{\link{move_admixedge_once}}: Moves an admixture edge to a nearby location.
#' \item \code{\link{flipadmix_random}}: Flips the direction of an admixture edge (if possible).
#' \item \code{\link{mutate_n}}: Apply `n` of the mutation functions in this list to a graph (defaults to 2).
#' }
#' @param opt_worst_residual Optimize for lowest worst residual instead of best score. `FALSE` by default, because the likelihood score is generally a better indicator of the quality of the model fit, and because optimizing for the lowest worst residual is slower (because f4-statistics need to be computed).
#' @param return_searchtree Return the search tree in addition to the models. Output will be a list with three items: models, search tree, search tree as data frame
#' @param plusminus_generations If the best score does not improve after `plusminus_generations` generations, another approach to improving the score will be attempted: A number of graphs with on additional admixture edge will be generated and evaluated. The resulting graph with the best score will be picked, and new graphs will be created by removing any one admixture edge (bringing the number back to what it was originally). The graph with the lowest score will then be selected. This often makes it possible to break out of local optima, but is slower than regular graph modifications.
#' If the current number of admixture events is lower than `max_numadmix`, the last step (removing an admixture edge) will be skipped.
#' @param admix_constraints A data frame with constraints on the number of admixture events for each population.
#' See \code{\link{satisfies_numadmix}}
#' As soon as one graph happens to satisfy these constraints, all subsequently generated graphs will be required to also satisfy them.
#' @param event_constraints A data frame with constraints on the order of events in an admixture graph.
#' See \code{\link{satisfies_eventorder}}
#' As soon as one graph happens to satisfy these constraints, all subsequently generated graphs will be required to also satisfy them.
#' @param reject_f4z If this is a number greater than zero, all f4-statistics with `abs(z) > reject_f4z` will be used to constrain the search space of admixture graphs: Any graphs in which f4-statistics greater than `reject_f4z` are expected to be zero will not be evaluated.
#' @param max_admix Maximum number of admixture edges. By default, this number is equal to `numadmix`, or to the number of admixture edges in `initgraph`, so the number of admixture edges stays constant. Setting this to a higher number will lead to more admixture edges being added occasionally (see `plusminus_generations`). Graphs with additional admixture edges will only be accepted if they improve the score by 5% or more.
#' @param verbose Print progress updates
#' @param ... Additional arguments passed to \code{\link{qpgraph}}
#' @return A nested data frame with one model per line
#' @seealso \code{\link{qpgraph}}, \code{\link{find_graphs_old}}
#' @examples
#' \dontrun{
#' res = find_graphs(example_f2_blocks, numadmix = 2)
#' res %>% slice_min(score)
#' }
#' \dontrun{
#' # Start with a graph with 0 admixture events, increase up to 3, and stop after 10 generations of no improvement
#' pops = dimnames(example_f2_blocks)[[1]]
#' initgraph = random_admixturegraph(pops, 0, outpop = 'Chimp.REF')
#' res = find_graphs(example_f2_blocks, initgraph = initgraph, stop_gen2 = 10, max_admix = 3)
#' res %>% slice_min(score)
#' }
find_graphs = function(data, numadmix = 0, outpop = NULL, stop_gen = 100, stop_gen2 = 15, stop_score = 0, stop_sec = NULL,
                       initgraph = NULL, numgraphs = 10,
                       mutfuns = namedList(spr_leaves, spr_all, swap_leaves, move_admixedge_once, flipadmix_random,
                                            place_root_random, mutate_n),
                       opt_worst_residual = FALSE, plusminus_generations = 5, return_searchtree = FALSE,
                       admix_constraints = NULL, event_constraints = NULL, reject_f4z = 0, max_admix = numadmix,
                       verbose = TRUE, ...) {

  nodups = TRUE
  traceback_gen = Inf

  f2_blocks = get_f2(data)
  if(is.null(initgraph)) {
    pops = dimnames(f2_blocks)[[1]]
    graph = random_admixturegraph(pops, numadmix, outpop = outpop)
  } else {
    graph = initgraph
    numadmix = numadmix(graph)
  }
  ell = list(...)
  if(!all(names(ell) %in% names(formals(qpgraph)))) {
    notused = setdiff(names(ell), names(formals(qpgraph)))
    stop(paste0("The following arguments are not recognized: '", paste0(notused, collapse = "', '"), "'"))
  }
  if('f3basepop' %in% names(ell)) {
    f3basepop = ell$f3basepop
  } else {
    f3basepop = get_leafnames(graph)[1]
  }
  if('numstart' %in% names(ell)) {
    numstart = ell$numstart
  } else {
    numstart = 1
  }
  if(!isFALSE(opt_worst_residual)) {
    qpgfun = function(graph, ...) {
      res = qpgraph(f2_blocks, graph, numstart = numstart, return_fstats = opt_worst_residual, f3basepop = f3basepop, ...)
      res$score = res$worst_residual
      res$worst_residual = res$f4 = NULL
      res
      }
  } else {
    if('f3precomp' %in% names(ell)) {
      f3precomp = ell$f3precomp
    } else {
      f3precomp = qpgraph_precompute_f3(f2_blocks, get_leafnames(graph), f3basepop = f3basepop)
    }
    qpgfun = function(graph, ...) qpgraph(NULL, graph, numstart = numstart, f3precomp = f3precomp, ...)
  }
  stop_at = Sys.time() + stop_sec
  wfuns = namedList(rearrange_negadmix3, replace_admix_with_random)
  dfuns = namedList(rearrange_negdrift)
  allfuns = c(wfuns, dfuns, mutfuns)

  models = tibble(g = list(simplify_graph(graph))) %>%
    mutate(res = map(g, qpgfun), hash = map_chr(g, graph_hash), lasthash = graph_hash(graph)) %>%
    unnest_wider(res) %>%
    #mutate(edges = map(edges, ~filter(., weight < 0))) %>%
    transmute(gen2 = 0, hash, g, edges, score)
  best = besthist = models$score
  bestmut = 'none'
  gimp = numtraceback = 0
  st = data.tree::Node$new('R')
  st$AddChild(models$hash,
              gen2 = 0,
              score = models$score,
              imp = 0,
              scm_1 = NA,
              scm_5 = NA,
              cntm_1 = NA,
              cntm_5 = NA)
  stdat = st_to_dat(st)
  tm = Sys.time()
  nonzero_f4 = if(reject_f4z > 0) f4(f2_blocks) %>% filter(abs(z) > reject_f4z) else NULL
  nzf4 = admixc = eventc = lasttracebacklevel = NULL
  lasttracebackscore = Inf

  for(i in seq_len(stop_gen)) {

    if(best <= stop_score || gimp > stop_gen2 || !is.null(stop_sec) && Sys.time() > stop_at) break
    if(verbose) {
      sc = besthist[max(1,i-1)]
      alert = if(isTRUE(sc == best)) alert_success else alert_info
      #msg = paste0(i, ': sc ', round(besthist[max(1,i-1)], 3), '\tbest ', round(best, 3),'\tnew ', if(i==1) 1 else nrow(newmod), '\ttot ', sum(!is.na(models$score)), ' ', sum(stdat$totalCount == 1), ' ', sum(stdat$score[stdat$totalCount == 1] < best*2),'\t')
      msg = paste0(i, ': score ', round(besthist[max(1,i-1)], 3), '\tbest score ', round(best, 3),'\t')
      na = if(i == 1 || is.null(newmod) || nrow(newmod) == 0) numadmix(graph) else numadmix(newmod$g[[1]])
      if(numadmix != max_admix && i > 1) msg %<>% paste0('numadmix ', na, '\t')
      msg %<>% paste0(bestmut, '\ttot ', sum(!is.na(models$score)))
    }

    if(gimp >= traceback_gen) {
      if(is.null(lasttracebacklevel)) lasttracebacklevel = newgraph$level#-traceback_gen
      #if(is.null(lasttracebacklevel)) lasttracebacklevel = 2
      else lasttracebacklevel = lasttracebacklevel - 1
      numtraceback = numtraceback + 1
      #data.tree::Prune(st, function(x) x$level < lasttracebacklevel)
      if(lasttracebacklevel > 0) {
        st$Do(function(x) x$closed = x$level >= lasttracebacklevel)
        stdat = st %>% st_to_dat
        gimp = 0
        lasttracebackscore = stdat %>% filter(level <= lasttracebacklevel) %>% slice_min(score) %>% pluck('score', 1)
        if(is.null(lasttracebackscore)) lasttracebackscore = Inf
        if(verbose) alert_info(paste0('Traceback to level ', lasttracebacklevel, ' with score ', round(lasttracebackscore, 3), '\n'))
      }
    }
    #newgraph = pick_graph(stdat, verbose = verbose > 2)
    newgraph = stdat %>% filter(!isTRUE(closed)) %>% slice_min(score, with_ties = FALSE)
    if(nrow(newgraph) == 0) newgraph = stdat %>% slice_min(score, with_ties = FALSE)

    if(verbose > 1) {
      msg %<>% paste0(newgraph %$% paste0(' ', round(score, 2), ' ', round(score/best, 2),'\tg ',gen2, ' ', gimp, '\tcnt ',cntm_1,' ',cntm_5, '\ttime '), round(as.numeric(Sys.time()-tm), 1), '\t', bestmut)
      tm = Sys.time()
    }
    if(verbose) alert(paste0(msg, '\n'))
    sel = models %>% filter(hash == newgraph$hash[[1]])

    graph = sel$g[[1]]
    if(!is.null(outpop) && is.null(get_outpop(graph))) stop('fg error')

    if(is.null(nzf4) && reject_f4z > 0 && satisfies_nonzerof4(graph, nonzero_f4)) {
      nzf4 = nonzero_f4
      if(verbose) alert_info('All zero f4 ok!\n')
    }
    if(is.null(admixc) && !is.null(admix_constraints) && satisfies_numadmix(graph, admix_constraints)) {
      admixc = admix_constraints
      if(verbose) alert_info('Admix constraints ok!\n')
    }
    if(is.null(eventc) && !is.null(event_constraints) && satisfies_eventorder(graph, event_constraints)) {
      eventc = event_constraints
      if(verbose) alert_info('Event constraints ok!\n')
    }

    if(gimp > 0 && gimp %% plusminus_generations == 0) {

      sel = models %>% slice_min(score, with_ties = FALSE)
      graph = sel$g[[1]]
      tryCatch({
      newmod = eval_plusnadmix(graph, qpgfun, n = 1, ntry = numgraphs*10,
                               nonzero_f4 = nzf4, admix_constraints = admixc,
                               event_order = eventc, verbose = verbose)
      if(nrow(newmod) == 0) next
      newmod %<>% slice_min(score, with_ties = FALSE)
      }, error = function(e) stop('fg error 2'))
      mf = 'plusnadmix'
      excess_admix = numadmix(newmod$graph[[1]]) - max_admix
      if(excess_admix > 0) {
        mf = 'plusminusn'
        newmod = eval_minusnadmix(newmod$graph[[1]], qpgfun, n = excess_admix, ntry = numgraphs*10,
                                  nonzero_f4 = nzf4, admix_constraints = admixc,
                                  event_order = eventc, verbose = verbose)
        if(nrow(newmod) == 0) next
        newmod %<>% slice_min(score, with_ties = FALSE)
      }
      newmod %<>% mutate(hash = map_chr(graph, graph_hash)) %>%
        transmute(hash, lasthash = sel$hash[[1]], g = graph, gen2 = i, edges, score, mutfun = mf)
      if(newmod$score > min(models$score, na.rm=F)*0.95) {
        gimp = gimp + 1
        next
      }

    } else {

      e = qpgfun(graph, constrained = FALSE)$edges %>% filter(weight < 0)
      if(nrow(e) == 0) {
        newmod = tibble()
      } else {
        newmod = e %>% slice_min(weight, n = floor(numgraphs/2)) %>%
          mutate(mutfun = ifelse(type == 'admix', sample(names(wfuns), 1), sample(names(dfuns), 1)),
                 fun = allfuns[mutfun],
                 g = pmap(list(fun, from, to), function(x, y, z) possibly(x, NULL)(graph, y, z))) %>%
          rowwise %>% filter(!is.null(g)) %>% ungroup
      }
      remaining = numgraphs - nrow(newmod)

      if(remaining > 0) {
        swape = swap_negdrift(graph, e$from, e$to)
        if(!is.null(swape) && length(swape) > 0) {
          newswap = tibble(g = swape, mutfun = 'swap_negdrift') %>%
            slice_sample(n = min(length(swape), ceiling(remaining/2)))
          newmod %<>% bind_rows(newswap)
        }
        remaining = numgraphs - nrow(newmod)
      }
      if(remaining > 0) {

      tryCatch({
        randmut = tibble(mutfun = names(mutfuns), fun = map(mutfuns, ~possibly(., NULL))) %>%
          slice_sample(n = remaining, replace = TRUE) %>%
          rowwise %>% mutate(g = list(fun(graph))) %>% filter(!is.null(g)) %>% ungroup %>% select(-fun)
        newmod %<>% bind_rows(randmut)
        }, error = function(e) if(verbose) alert_warning("Could not apply mutation function"))
      }

      newmod %<>% rowwise %>% filter(satisfies_constraints(g, nzf4, admixc, eventc)) %>% ungroup
      newmod %<>%
        transmute(g, hash = map_chr(g, graph_hash), lasthash = sel$hash[[1]], mutfun)
      if(nodups) newmod %<>% filter(!duplicated(hash), !hash %in% models$hash)
      tryCatch({
      if(!is.null(outpop) && nrow(newmod) > 0) newmod %<>% rowwise %>% mutate(og = list(get_outpop(g))) %>%
        filter(!is.null(og) && og == outpop) %>% ungroup
      }, error = function(e) browser())
      if(nrow(newmod) == 0) {
        if(verbose) alert_danger('No new models!\n')
        next
      }
      newmod %<>%
        mutate(res = furrr::future_map(g, qpgfun, .options = furrr::furrr_options(seed = TRUE))) %>%
        unnest_wider(res) %>%
        transmute(gen2 = i, hash, lasthash, g, edges, score, mutfun) %>%
        arrange(score)
    }

    besthist[i] = newmod$score[1]
    bestmut = newmod$mutfun[1]
    if(besthist[i] < best) {
      best = besthist[i]
      gimp = 0
    } else gimp = gimp + 1
    if(nodups) newmod %<>% filter(!hash %in% models$hash)
    models %<>% bind_rows(newmod, .)

    for(j in seq_len(nrow(newmod))) {
      node = data.tree::FindNode(st, newmod$lasthash[j])
      node$AddChild(newmod$hash[j],
                    gen2 = i,
                    score = newmod$score[j],
                    imp = as.numeric(best != newmod$score[1]),
                    scm_1 = node$parent$score,
                    scm_5 = node$parent$parent$parent$parent$parent$score,
                    cntm_1 = node$parent$totalCount,
                    cntm_5 = node$parent$parent$parent$parent$parent$totalCount)
    }
    stdat = st %>% st_to_dat
  }

  mout = models %>% transmute(generation = gen2, graph = unname(g), edges, score, mutation = mutfun, hash, lasthash)
  if(return_searchtree) return(list(models = mout, tree = st, dat = stdat))
  mout
}

st_to_dat = function(st) {

  fp_inf = function(node) node$scbest = if(node$isLeaf) node$score else min(node$score, sapply(node$children, fp_inf), na.rm=T)
  fp_1 = function(node, n = 1) node$scp_1 = if(node$isLeaf || n == 0) node$score else min(map_dbl(node$children, function(node) if(is.null(node$score)) NA else node$score), na.rm=T)
  fp_5 = function(node, n = 5) node$scp_5 = if(node$isLeaf || n == 0) node$score else min(sapply(node$children, function(node) fp_5(node, n-1)), na.rm=T)
  fimp = function(node) node$gimp = if(is.null(node$imp) || is.na(node$imp) || node$imp == 0) 0 else node$imp + fimp(node$parent)

  st$Do(fp_inf)
  st$Do(fp_1)
  st$Do(fp_5)
  st$Do(fimp)

  as.data.frame(st, NULL, T, 'name', 'score', 'scbest', 'gimp', 'scp_1', 'scp_5', 'scm_1', 'scm_5', 'height', 'level', 'totalCount', 'cntm_1', 'cntm_5', 'gen2', 'closed') %>%
    as_tibble %>% rename(hash = name) %>% select(-1)
}

graph_potential = function(stdat) {

  stdat %>% pivot_longer(contains('_'), names_to = c('type', 'gen'), names_pattern = '([A-Za-z]+)_(\\d+)', values_to = 'v') %>% pivot_wider(names_from = type, values_from = v) %>% mutate(relscore = score/min(score, na.rm=T), relm = scm/score, relp = score/scp, relmcnt = relm/cntm, relpcnt = relp/totalCount) %>% pivot_wider(names_from = gen, values_from = c(scp, scm, cntm, relm, relp, relmcnt, relpcnt)) %>% mutate(potential = 1/(relscore * relmcnt_1 * relmcnt_5))

}


pick_graph = function(stdat, minprob = 0.5, cnt1 = 10, cnt5 = 30, pen1 = 1.2, pen5 = 2, gpen = 0.2, verbose = FALSE) {
  # selects new graph from search tree data and returns one row of stdat

  sel = stdat %>% slice_min(score, with_ties = FALSE)
  lastimp = max(stdat$gen2, na.rm = TRUE) - sel$gen2
  #if(sel$totalCount[[1]] == 1 || runif(1) < minprob) {
  if(sel$totalCount[[1]] == 1 || lastimp < 3 || runif(1) > minprob) {
    return(sel)
  }

  out = stdat %>% filter(totalCount == 1) %>%
    mutate(g1 = replace_na(ifelse(cntm_1 > cnt1, cntm_1/cnt1, 1), 1),
           g5 = replace_na(ifelse(cntm_5 > cnt5, cntm_5/cnt5, 1), 1),
           imp = replace_na((gimp+1)^gpen, 1),
           potential = 1000 / (score * g1 * g5 * imp), potential = replace_na(potential, 1/max(score, na.rm=T)))
  maxpot = max(out$potential)
  if(maxpot == Inf) stop('maxpot inf')
  out %<>% slice_sample(n = 1, weight_by = potential^10)
  if(verbose) alert_warning(out %$% paste0('\tcnt ', sum(stdat$totalCount == 1), '\tpen ', paste0(round(g1, 2), ' ', round(g5, 2), ' ', round(imp, 2), '\tpot ', round(potential, 2), ' ', round(maxpot, 2), '\n')))
  out
}

pick_graph2 = function(stdat, lasttracebacklevel, tracebackafter = 4, verbose = FALSE) {

  sel = stdat %>% slice_min(score, with_ties = FALSE)
  lastimp = max(stdat$gen2, na.rm = TRUE) - sel$gen2
  if(sel$totalCount[[1]] == 1 || lastimp < tracebackafter) {
    return(sel)
  }
  if(is.null(lasttracebacklevel)) lasttracebacklevel = max(stdat$level)
  sel = stdat %>% filter(level < lasttracebacklevel, height == 1) %>% slice_max(level) %>% slice_min(score, with_ties = FALSE)
  if(nrow(sel) == 0) stop('pick graph error')
  if(verbose) print(sel)
  sel
}

swap_negdrift = function(graph, from, to) {
  # from, to are vectors of edges to be swapped

  out = tibble(from, to) %>%
    expand_grid(rename(., from2 = from, to2 = to)) %>%
    filter(from != from2, to != to2, from != to2, to != from2) %>%
    rowwise %>%
    mutate(g = graph %>%
             delete_edges(paste(c(from, from2), c(to, to2), sep='|')) %>%
             add_edges(c(from, to2, from2, to)) %>% list)
  if(nrow(out) > 0) out %>% filter(is_valid(g)) %>% pull(g)
}


replace_admix_with_random = function(graph, from = NULL, to = NULL) {

  leaves = sort(get_leafnames(graph))
  g = graph %>% delete_admix(from, to)
  g %<>% insert_admix
  g
}

#' Get unique hash of an admixture graph
#'
#' This can be used to check if two graphs are identical. For two graphs to be identical, the topology and leaf nodes have to match, but internal node names do not matter.
#' @export
#' @param graph Admixture graph in igraph format
#' @return A hash string of the admixture graph
graph_hash = function(graph) {
  # graph identity is determined by topology and leaf names, but not by internal names

  #graph %>% unify_vertex_names() %>% as_edgelist %>% digest::digest()
  graph %<>% simplify_graph
  leaves = get_leafnames(graph) %>% sort
  internal = setdiff(names(V(graph)), leaves)
  graph %>%
    igraph::distances(internal, leaves, mode = 'out') %>%
    apply(1, function(x) paste(str_replace(as.character(x), 'Inf', '.'), collapse = '')) %>%
    unname %>% sort %>% c(leaves) %>% digest::digest()
}


edge_targets = function(graph) {

  leaves = graph %>% get_leafnames()
  nodes = names(V(graph))
  internal = setdiff(nodes, leaves)
  dst = graph %>% distances(nodes, leaves, mode = 'out')
  reachlist = map(nodes, ~names(which(is.finite(dst[.,])))) %>% set_names(nodes)
  graph %>%
    as_edgelist %>%
    as_tibble(.name_repair = ~c('from', 'to')) %>%
    add_count(to) %>%
    rowwise %>%
    mutate(targets = reachlist[to], tgt = paste0((leaves %in% targets)+0, collapse='')) %>%
    group_by(tgt) %>%
    mutate(identifiable = n > 1 | n() == 1) %>%
    ungroup

}


vs_to_es = function(vs) {
  # turns igraph vertex sequence into a vector of edge names
  if('igraph.vs' %in% class(vs)) nam = names(vs)
  else nam = vs
  paste0(head(nam, -1), '|', nam[-1])
}


path_intersections = function(graph) {

  pp = graph %>% pair_paths %>% rowwise %>% mutate(id = paste(sort(c(from, to)), collapse = ' ')) %>% ungroup
  expand_grid(pp, pp %>% rename_with(~paste0(., 2))) %>%
    filter(id != id2) %>%
    rowwise %>%
    mutate(is = list(intersect(path, path2)), islen = length(is), isstr = paste0(is, collapse = ' '),
           admtot = length(union(admnodes, admnodes2)), id4 = paste(sort(c(id, id2)), collapse = ' ')) %>%
    filter(islen > 0) %>%
    group_by(from, to, from2, to2) %>%
    mutate(uniq = list(names(which(table(unlist(is))==1)))) %>%
    rowwise %>%
    mutate(allunique = all(is %in% uniq)) %>%
    ungroup

}

# unidentifiable_admixture = function(graph) {
#
#   stopifnot(is_valid(graph) && is_simplified(graph))
#
#   leaves = graph %>% get_leafnames
#   adm = names(which(degree(graph, mode = 'in') > 1))
#   unresolved_adm = adm
#   pis = path_intersections(graph)
#
#   for(i in 1:10) {
#     if(length(unresolved_adm) == 0) break
#
#     pis %<>% rowwise %>%
#       mutate(unresolved = list(intersect(unresolved_adm, union(admnodes, admnodes2))), unum = length(unresolved)) %>%
#       ungroup
#
#     resolved0 = pis %>% filter(unum == 0, islen > 0) %>% pull(isstr) %>% unique
#     resolved1 = pis %>% filter(unum == 1, islen > 0, allunique) %>% pull(isstr) %>% unique
#
#     resolved_adm = pis %>% filter(isstr %in% intersect(resolved0, resolved1), unum == 1, allunique) %$%
#       unique(unlist(c(admnodes, admnodes2)))
#
#     if(length(resolved_adm) == 0) break
#
#     unresolved_adm %<>% setdiff(resolved_adm)
#   }
#   unresolved_adm
# }

paths_through = function(graph, leaves, node) {

  nl = graph %>% neighbors(node, mode = 'out') %>% pluck(1) %>% names
  nr = graph %>% neighbors(node, mode = 'out') %>% pluck(2) %>% names

  paths = all_simple_paths(graph, node, leaves, mode = 'out') %>%
    map(names)

  expand_grid(up = paths %>% keep(~.[[2]] == nl), down = paths %>% keep(~.[[2]] == nr)) %>%
    bind_rows(rename(., up=down, down=up))
}

pair_paths = function(graph) {

  leaves = graph %>% get_leafnames
  adm = names(which(degree(graph, mode = 'in') > 1))
  internal = setdiff(names(V(graph)), leaves)
  nonadm = setdiff(internal, adm)
  map(nonadm, ~paths_through(graph, leaves, .)) %>% set_names(nonadm) %>% bind_rows(.id = 'pivot') %>% rowwise %>% mutate(from = tail(up, 1), to = tail(down, 1), path = list(c(vs_to_es(up), vs_to_es(down))), admnodes = list(intersect(adm, c(up, down))), admnum = length(admnodes)) %>% ungroup

}

# continue here: use path intersections to determine edges which are not identifiable


tabulate_subgraphs = function(graphlist, pops) {
  # for a list of graphs and set of populations in that graph
  # returns data frame counting all subgraphs consisting of those populations

  #delete_leaves = function(graph, leaves) reduce(leaves, delete_leaf, .init = graph)
  graphlist %>%
    tibble(.name_repair = ~'graph') %>%
    rowwise %>%
    mutate(subgraph = list(keep_leaves(graph, pops)), hash = graph_hash(subgraph)) %>%
    ungroup %>%
    select(-graph) %>%
    add_count(hash) %>%
    filter(!duplicated(hash)) %>%
    arrange(-n)
}

tabulate_subgraphs_n = function(graphlist, pops, n, outpop = NULL, verbose = TRUE) {

  cmb = t(combn(setdiff(pops, outpop), n))
  if(verbose) alert_info(paste0('Tabulating ', nrow(cmb), ' combinations of ', n, ' populations...'))
  #pb = progress::progress_bar$new(total = nrow(cmb))
  as_tibble(cmb, .name_repair = ~paste0('X', seq_len(n))) %>%
    mutate(i = 1:n()) %>% rowwise %>%
    mutate(subgraphs = list(tabulate_subgraphs(graphlist, c(outpop, cmb[i,])))) %>%
    unnest(subgraphs) %>% arrange(-n)
}


count_admix = function(graph) {

  leaves = get_leafnames(graph)
  admnodes = names(which(degree(graph, mode = 'in') > 1))
  map_dbl(leaves, ~length(intersect(names(subcomponent(graph, .x, mode = 'in')), admnodes))) %>%
    set_names(leaves) %>% enframe('pop', 'nadmix') %>% arrange(pop)
}

unadmixed = function(graph) {

  graph %>% count_admix %>% filter(nadmix == 0) %>% pull(pop)
}


#' Assign proxy populations to admixed populations
#'
#' @export
#' @param graph An admixture graph in igraph format, or as edge list with column `weight`
#' @return A data frame with columns `pop`, `nadmix`, `proxy`, `nproxies` (and `weight`)
#' @examples
#' \dontrun{
#' summarize_proxies(example_igraph)
#' }
summarize_proxies = function(graph) {

  if('data.frame' %in% class(graph)) {
    edges = graph
    graph %<>% edges_to_igraph()
  } else {
    edges = NULL
  }
  leaves = get_leafnames(graph)
  admnodes = names(which(degree(graph, mode = 'in') > 1))
  admparents = map(admnodes, ~names(neighbors(graph, ., mode = 'in'))) %>% do.call(rbind, .)

  unadm_desc = function(graph, v, leaves) {
    all_simple_paths(graph, v, leaves, mode = 'out') %>%
      discard(~any(names(.)[-1] %in% admnodes)) %>%
      set_names(map_chr(., ~tail(names(.), 1))) %>% names %>% table %>%
      enframe %>% filter(value == 1) %>% pull(name)
  }
  vnam = names(V(graph))
  ud = map(vnam, ~unadm_desc(graph, ., leaves)) %>% set_names(vnam) #%>% keep(~length(.) > 0)

  # very inefficient; change this if function is slow
  find_keys = function(graph, v, stopv) {
    root = names(get_root(graph))
    for(n in names(all_simple_paths(graph, v, root, mode = 'in')[[1]])) {
      #if(stopv %in% names(subcomponent(graph, n, mode = 'out'))) return(NULL)
      if(length(ud[[n]]) > 0) return(ud[[n]])
      if(degree(graph, n, mode = 'in') > 1) {
        par = names(neighbors(graph, n, mode = 'in'))
        return(c(find_keys(graph, par[1], par[2]), find_keys(graph, par[2], par[1])))
      }
    }
  }
  admkeys = tibble(admnodes,
                   left = map(1:nrow(admparents), ~find_keys(graph, admparents[.,1], admparents[.,2])),
                   right = map(1:nrow(admparents), ~find_keys(graph, admparents[.,2], admparents[.,1]))) %>%
    rowwise %>%
    mutate(is = list(intersect(left, right)), left = list(setdiff(left, is)), right = list(setdiff(right, is)),
           left = list(ifelse(length(left) == 0, NA, left)), right = list(ifelse(length(right) == 0, NA, right))) %>%
    select(-is) %>% ungroup
  if(!is.null(edges)) {
    admkeys %<>% left_join(edges %>% filter(type == 'admix', !duplicated(to)) %>%
                             transmute(admnodes = to, wl = weight), by = 'admnodes') %>%
      mutate(wr = 1-wl)
  } else {
    admkeys %<>% mutate(wl = 0, wr = 0)
  }
  lastadm = map(leaves, ~intersect(names(subcomponent(graph, .x, mode = 'in')), admnodes)) %>%
    set_names(leaves) %>% discard(~length(.) == 0) %>% map(~head(., 1)) %>% unlist %>% enframe('pop', 'admnodes')

  proxies = lastadm %>%
    left_join(admkeys %>% transmute(admnodes, proxy = left, weight = wl), by = 'admnodes') %>%
    unnest(proxy) %>%
    bind_rows(lastadm %>%
                left_join(admkeys %>% transmute(admnodes, proxy = right, weight = wr), by = 'admnodes') %>%
                unnest(proxy)) %>%
    select(-admnodes) %>% add_count(pop, name = 'nproxies')
  if(is.null(edges)) proxies %<>% select(-weight)
  graph %>% count_admix %>% filter(nadmix > 0) %>% left_join(proxies, by = 'pop')
}

#' List proxy populations in graphlist
#'
#' @export
#' @param graphlist A list of admixture graphs
#' @return A data frame with columns `pop`, `proxy`, `nadmix`, `nproxies`, `n`, `frac`
#' @examples
#' \dontrun{
#' summarize_proxies_list(graphlist)
#' }
summarize_proxies_list = function(graphlist) {
  # need to update for list of fits

  graphlist %>% map(summarize_proxies) %>% bind_rows(.id = 'g') %>%
    group_by(pop) %>% add_count(proxy) %>%
    group_by(pop, proxy) %>%
    summarize(nadmix = mean(nadmix), nproxies = mean(nproxies), n = mean(n), .groups = 'drop') %>%
    mutate(frac = n/length(graphlist)) %>% arrange(-n) %>% ungroup
}



proxypred = function(sim, graphlist, auc = FALSE) {

  pops = get_leafnames(sim)

  out = expand_grid(pop = pops, proxy = pops) %>% filter(pop != proxy) %>%
    left_join(summarize_proxies(sim), by = c('pop', 'proxy')) %>%
    transmute(pop, proxy, obs = !is.na(nadmix)) %>%
    left_join(summarize_graphlist(graphlist), by = c('pop', 'proxy')) %>%
    transmute(pop, proxy, obs, pred = replace_na(frac, 0))
  if(auc) out = out %$% pROC::auc(obs, pred) %>% c
  out
}


node_events = function(graph, leaves, node) {
  # for a node, list associated population splits and merges

  children = names(neighbors(graph, node, mode = 'out'))
  if(length(children) == 0) {
    #out = tibble(type = 'leaf', pop1 = node, pop2 = NA)
    out = tibble()
  } else if(length(children) == 1) {
    # parents = neighbors(graph, node, mode = 'in')
    # desc1 = subcomponent(graph, parents[1], mode = 'out') %>% names %>% intersect(leaves)
    # desc2 = subcomponent(graph, parents[2], mode = 'out') %>% names %>% intersect(leaves)
    # out = expand_grid(type = 'merge', pop1 = setdiff(desc1, desc2), pop2 = setdiff(desc2, desc1))
    out = tibble()
  } else if(length(children) == 2) {
    desc1 = subcomponent(graph, children[1], mode = 'out') %>% names %>% intersect(leaves)
    desc2 = subcomponent(graph, children[2], mode = 'out') %>% names %>% intersect(leaves)
    out = expand_grid(type = 'split', pop1 = setdiff(desc1, desc2), pop2 = setdiff(desc2, desc1))
    if(children[1] %in% leaves) out %<>% bind_rows(tibble(type = 'leaf', pop1 = children[1], pop2 = NA))
    if(children[2] %in% leaves) out %<>% bind_rows(tibble(type = 'leaf', pop1 = children[2], pop2 = NA))
  } else {
    stop('More than two children!')
  }
  out
}


all_node_events = function(graph) {
  # for each node in a graph, list associated population splits and merges

  leaves = get_leafnames(graph)
  graph %>% V %>% names %>% set_names %>% map(~node_events(graph, leaves, .)) %>% bind_rows(.id = 'node')

}

#' List population split events in a graph
#'
#' @export
#' @param graph An admixture graph
#' @return A data frame with columns `earlier1`, `earlier2`, `later1`, `later2`
#' @examples
#' \dontrun{
#' summarize_eventorder(example_igraph)
#' }
summarize_eventorder = function(graph, unique_only = TRUE) {

  node_pairs = graph %>% igraph::distances(mode = 'out') %>% as_tibble(rownames = 'from') %>%
    pivot_longer(-from, names_to = 'to', values_to = 'order') %>%
    mutate(order = is.finite(order)+0) %>% filter(order > 0, from != to) %>%
    bind_rows(rename(., from = to, to = from) %>% mutate(order = -order))

  events = all_node_events(graph) %>%
    bind_rows(rename(., pop2 = pop1, pop1 = pop2) %>% filter(type != 'leaf'))

  out = expand_grid(rename_with(events, ~paste0(., '_1')), rename_with(events, ~paste0(., '_2'))) %>%
    filter(node_1 != node_2) %>% left_join(node_pairs, by = c('node_1'='from', 'node_2'='to')) %>%
    mutate(order = replace_na(order, 0))
  if(unique_only) {
    out %<>% group_by(type_1, pop1_1, pop2_1, type_2, pop1_2, pop2_2) %>%
      filter(all(order != -1) & any(order == 1)) %>% ungroup %>%
      transmute(earlier1 = pmin(pop1_1, pop2_1, na.rm=T),
                earlier2 = pmax(pop1_1, pop2_1),
                later1 = pmin(pop1_2, pop2_2, na.rm=T),
                later2 = pmax(pop1_2, pop2_2), type1 = type_1, type2 = type_2) %>% distinct
  }
  out
}

#' List population split events in a list of graphs
#'
#' @export
#' @param graphlist A list of admixture graphs
#' @return A data frame with columns `earlier1`, `earlier2`, `later1`, `later2`, `n`, `frac`
#' @examples
#' \dontrun{
#' summarize_eventorder(graphlist)
#' }
summarize_eventorder_list = function(graphlist) {

  tot = length(graphlist)
  graphlist %>% map(summarize_eventorder) %>% bind_rows(.id = 'g') %>%
    count(earlier1, earlier2, later1, later2) %>% mutate(frac = n/tot)
}


# unresolvable_params = function(groebner, vrs) {
#
#   resolved = c()
#   remaining = groebner %>% map(~intersect(vrs, unlist(map(., ~head(names(.), -1))))) %>% keep(~length(.) > 0)
#
#   repeat {
#   newresolved = remaining %>% keep(~length(.) == 1) %>% unlist %>% unique
#   resolved %<>% c(newresolved)
#   remaining %<>% keep(~length(.) > 1) %>% map(~setdiff(., resolved))
#   if(length(newresolved) == 0 || length(remaining) == 0) break
#   }
#   remaining
# }


root_paths = function(graph) {

  leaves = get_leafnames(graph)
  root = get_rootname(graph)
  adm = names(which(degree(graph, mode = 'in') > 1))
  parents = adm %>% rlang::set_names() %>% map(~names(neighbors(graph, ., mode = 'in')))
  admedges = parents %>% imap(~paste(.x, .y, sep = '|'))

  paths = all_simple_paths(graph, root, leaves, mode = 'out') %>% map(names) %>%
    tibble(.name_repair = ~'path') %>% rowwise %>%
    mutate(pop = tail(path, 1),
           edges = list(setdiff(vs_to_es(path), admedges)),
           admix = list(intersect(path, adm)),
           lr = list(map(admedges[admix], 1) %in% edges + 0),
           edges = list(setdiff(edges, unlist(admedges))),
           admedges = list(map2_chr(admedges[admix], 2-lr, ~.x[.y]))) %>%
    ungroup %>% add_count(pop)
  paths
}

path_pairs = function(graph) {

  # returns all pairs of paths from root to all leaves

  paths = root_paths(graph) %>% select(-admedges)
  expand_grid(rename_with(paths, ~paste0(.,'1')),
              rename_with(paths, ~paste0(.,'2'))) %>%
    filter(pop1 != pop2) %>% rowwise %>%
    mutate(edges = list(setdiff(union(edges1, edges2), intersect(edges1, edges2))),
           admix = list(c(admix1, admix2)), lr = list(c(lr1, lr2))) %>%
    ungroup
}

path_triples = function(graph) {

  paths = root_paths(graph) #%>% select(-edges)
  expand_grid(rename_with(paths, ~paste0(.,'1')),
              rename_with(paths, ~paste0(.,'2')),
              rename_with(paths, ~paste0(.,'3'))) %>%
    filter(pop1 != pop2, pop1 != pop3, pop2 != pop3) %>% rowwise %>%
    mutate(admix12 = list(c(admix1, admix2)),
           admix13 = list(c(admix1, admix3)),
           admix23 = list(c(admix2, admix3)),
           lr12 = list(c(lr1, lr2)),
           lr13 = list(c(lr1, lr3)),
           lr23 = list(c(lr2, lr3)),
           last12 = tail(intersect(path1, path2), 1),
           last13 = tail(intersect(path1, path3), 1),
           last23 = tail(intersect(path2, path3), 1),
           m121 = match(last12, path1),
           m131 = match(last13, path1),
           og = case_when(m121 < m131 ~ pop2,
                          m121 < match(last23, path2) ~ pop1,
                          m131 < match(last12, path1) ~ pop3)) %>%
    ungroup %>% select(-m121, m131)
}

#' Find well fitting admixture graphs
#'
#' This function generates and evaluates admixture graphs in `numgen` iterations
#' to find well fitting admixturegraphs.
#' @export
#' @param graph An admixture graph
#' @param substitute Should edge names be represented by shorter symbols?
#' @param nam Symbols used to shorten edge names
#' @return A list with two data frames: `equations` holds the equtions for all f2-statistics; `coding` has the mapping from edge names to edge symbols, which is used when `substitute = TRUE`
graph_equations = function(graph, substitute = TRUE, nam = c('a', 'e', 'f'), return_everything = FALSE) {

  pp = path_pairs(graph)
  leaves = get_leafnames(graph)
  e = pp$edges %>% unlist %>% unique
  a = names(which(degree(graph, mode = 'in') > 1))
  f = apply(combn(sort(leaves), 2), 2, paste, collapse = ' ')
  if(substitute) {
    avars = paste0(nam[1], seq_along(a))[seq_along(a)] %>% rlang::set_names(a)
    evars = paste0(nam[2], seq_along(e)) %>% rlang::set_names(e)
    fvars = paste0(nam[3], seq_len(choose(length(leaves), 2))) %>% rlang::set_names(f)
  } else {
    evars = e %>% paste0('`',. ,'`') %>% rlang::set_names(e)
    avars = (a %>% paste0('`',. ,'`'))[seq_along(a)] %>% rlang::set_names(a)
    fvars = f %>% paste0('`',. ,'`') %>% rlang::set_names(f)
  }
  coding = map2(list(avars, evars, fvars), nam, ~enframe(.x, 'edge', 'symbol') %>% mutate(type = .y)) %>% bind_rows

  eq = pp %>% filter(pop1 < pop2) %>%
    rowwise %>%
    mutate(mul = list(ifelse(lr == 0, avars[admix], paste0('(1 - ',avars[admix],')'))), add = list(paste0(evars[edges])),
           eq = list(paste0(paste0(mul, collapse = ' * '), ' * (', paste0(add, collapse = ' + '), ')') %>% str_replace_all('^ \\* ', '')),
           res = prod(ifelse(lr == 0, 1/4, 3/4)) * length(add),  resold = (1/4)^length(mul) * length(add),
           fvar = fvars[paste(pop1, pop2)]) %>%
    group_by(pop1, pop2) %>%
    summarize(equation = paste0(eq, collapse = ' + '), fvar = fvar[1], res = sum(res)) %>% ungroup %>%
    mutate(eq2 = paste0('(', equation, ')*2^10 - ', res*2^10)) %>% suppressMessages
  if(!return_everything) eq %<>% select(pop1, pop2, equation)

  list(equations = eq, coding = coding)
}

graph_to_function1 = function(graph, ge = NULL) {

  if(is.null(ge)) ge = graph_equations(graph)
  body1 = c('a = x[seq_len(na)]; e = x[(na+1):length(x)]; ')
  body2 = map_chr(ge$equations$equation, ~str_replace_all(., '([ae])([0-9]+)', '\\1\\[\\2\\]')) %>%
    paste(collapse = ', ') %>% paste('c(', ., ')')
  body = rlang::parse_expr(paste0('{', body1, body2, '}'))

  args = list(x = NULL, na = 0)
  eval(call("function", as.pairlist(args), body), env = parent.frame())
}

graph_to_function2 = function(graph, ge = NULL) {

  ge = graph_equations(graph)
  xx = map_chr(ge$equations$equation, ~str_replace_all(., '([ae])([0-9]+)', '\\1\\[\\2\\]'))
  Rcpp::cppFunction(paste0('NumericVector cppt(NumericVector x, int na) {x.push_front(0); NumericVector out(',length(ge$equations$equation),'); NumericVector a = x[Range(0,na)]; NumericVector e = x[Range(na-1,x.length())] = x[Range(na,x.length())]; ',paste0('out[', seq_along(xx)-1, '] = ', xx, collapse = '; '),'; return out;}'), plugins = 'cpp11')

}

graph_to_function3 = function(graph, ge = NULL) {

  if(is.null(ge)) ge = graph_equations(graph)
  na = numadmix(graph)
  ne = length(E(graph)) - 2*na

  pp = path_pairs(graph) %>% select(pop1, pop2, edges, admix, lr) %>% mutate(i = 1:n()) %>% filter(pop1 < pop2)
  pp2 = pp %>% select(-pop1, -pop2, -edges) %>% unnest(admix) %>% mutate(lr = pp %>% unnest(lr) %>% pull(lr))

  function(x, na) {

    a = x[seq_len(na)]
    e = x[(na+1):length(x)]

    adat = ge$coding %>% filter(type == 'a') %>% transmute(admix = edge, aval = x[seq_len(na)])
    edat = ge$coding %>% filter(type == 'e') %>% transmute(edges = edge, eval = x[(na+1):length(x)])
    admixvals = pp2 %>% left_join(adat, by = 'admix') %>%
      mutate(aval = ifelse(lr == 0, aval, 1-aval)) %>%
      group_by(i) %>% summarize(aval = prod(aval), .groups = 'drop')

    pp %>% left_join(admixvals, by = 'i') %>% mutate(aval = replace_na(aval, 1)) %>% unnest(edges) %>% left_join(edat, by = 'edges') %>% group_by(pop1, pop2) %>% summarize(f2pred = sum(aval * eval), .groups = 'drop') %>% ungroup %>% pull(f2pred)

  }
}

#' Make a function representing a graph
#'
#' This function takes an igraph object and turns it into a function that takes edge weights as input,
#' and outputs the expected f2-statistics.
#' @export
#' @param graph An admixture graph
#' @param admix_default The default weights for admixture edges
#' @param drift_default The default weights for drift edges
#' @param random_defaults Set default weights randomly for each edge between 0 and 1
#' @return A function mapping edge weights to f2-statistics
#' @examples
#' \dontrun{
#' mygraph = graph_f2_function(example_igraph)
#' mygraph(N3N8 = 0.1, `N2N1|Vindija.DG` = 0.4)
#' }
graph_f2_function = function(graph, admix_default = 0.5, drift_default = 0.01, random_defaults = FALSE) {

  ge = graph_equations(graph, substitute = FALSE)
  body1 = ge$equations$equation %>% paste(collapse = ', ') %>% paste('dat <- c(', ., '); ') %>% rlang::parse_expr()
  body2 = rlang::quo(d2 <- ge$coding %>% filter(type == 'f') %>% select(edge) %>% separate(edge, c('pop1', 'pop2'), ' '))
  body3 = rlang::expr(mutate(d2, f2 = dat))
  body = rlang::expr({!!body1; !!body2; !!body3})

  if(random_defaults) {
    na = ge$coding %>% filter(type == 'a') %>% nrow
    ne = ge$coding %>% filter(type == 'e') %>% nrow
    admix_default = runif(na)
    drift_default = runif(ne)
  }
  args = ge$coding %>% filter(type != 'f') %>%
    transmute(edge, val = ifelse(type == 'a', admix_default, drift_default)) %>%
    deframe %>% as.list
  rlang::eval_tidy(call("function", as.pairlist(args), body), env = parent.frame())
}



graph_jacobian = function(graph, ge = NULL, args = NULL) {

  if(is.null(ge)) ge = graph_equations(graph)
  if(is.null(args)) {
    na = numadmix(graph)
    ne = length(E(graph)) - 2*na
    args = c(runif(na), rep(1,ne))
  }
  symb = ge$coding %>% filter(type != 'f') %>% pull(symbol)

  jac = ge$equations$equation %>%
    map(~paste0('~', .) %>% as.formula %>% deriv(symb, symb) %>%
          exec(!!!args) %>% attr('gradient')) %>%
    do.call(rbind, .)
  rownames(jac) = paste(ge$equations$pop1, ge$equations$pop2)
  jac
}

graph_rank = function(graph) {

  jac = graph_jacobian(graph)
  qr(jac)$rank
}


#' Find all unidentifiable edges
#'
#' This function generates and evaluates admixture graphs in `numgen` iterations
#' to find well fitting admixturegraphs.
#' @export
#' @param graph An admixture graph
#' @return A data frame with all unidentifiable graph parameters
unidentifiable_edges = function(graph) {

  ge = graph_equations(graph)
  jac = graph_jacobian(graph, ge)

  dep = which(qr(jac)$rank == sapply(1:ncol(jac), function (i) qr(jac[,-i])$rank))
  adm = names(which(degree(graph, mode = 'in') > 1))
  parents = adm %>% rlang::set_names() %>% map(~names(neighbors(graph, ., mode = 'in')))
  out = ge$coding %>% slice(dep)
  oa = out %>% filter(type == 'a') %>% rename(to = edge) %>% rowwise %>%  mutate(from = parents[to]) %>%
    unnest(from) %>% select(from, to, type)
  oe = out %>% filter(type == 'e') %>% separate(edge, c('from', 'to'), '\\|') %>% select(-symbol)
  bind_rows(oa, oe) %>% mutate(type = ifelse(type == 'a', 'admix', 'edge'))
}

identifiable_sets = function(graph, jac, edge = NULL, n = 2) {

  rnk = qr(jac)$rank
  edges = colnames(jac)
  dep = which(rnk == sapply(1:ncol(jac), function (i) qr(jac[,-i])$rank))
  if(n == 1) return(t(t(edges[setdiff(1:ncol(jac), dep)])))
  if(n > length(dep)) return(NULL)
  if(is.null(edge)) cmb = combn(dep, n)
  else cmb = rbind(edge, combn(dep, n-1))
  #depn = which(sapply(1:ncol(cmb), function (i) qr(jac[,-cmb[,i]])$rank < rnk && all(map_lgl(seq_len(n), ~qr(jac[,-cmb[-.,i]])$rank == rnk))))
  depn = which(sapply(1:ncol(cmb), function (i) qr(jac[,-cmb[,i]])$rank < rnk))
  map(depn, ~edges[cmb[,.]]) %>% do.call('rbind', .)
}

identifiable_comb = function(graph, edge, jac = NULL, verbose = TRUE) {
  # returns the smallest sets in which an edge becomes identifiable

  stopifnot(edge %in% c(attr(E(graph), 'vnames'), names(V(graph))))
  if(is.null(jac)) jac = graph_jacobian(graph)
  edges = colnames(jac)

  for(i in seq_len(length(E(graph)))) {
    if(verbose) alert_info(paste0(i, '...\r'))
    us = identifiable_sets(graph, jac, edge = which(edges == edge), n = i)
    if(edge %in% unlist(us)) break
  }
  us[map_lgl(seq_len(nrow(us)), ~edge %in% us[.,]),,drop=FALSE]
}

predicted_f2 = function(graph, a = NULL, e = NULL) {

  ge = graph_equations(graph)
  fun = graph_to_function3(graph, ge)
  na = numadmix(graph)
  if(is.null(a)) a = runif(na)
  if(is.null(e)) e = rep(1e-2, length(E(graph))-2*length(a))
  ge$eq %>% select(pop1, pop2) %>% mutate(f2 = fun(c(a,e), na))
}

predicted_f4 = function(graph, a = NULL, e = NULL) {

  f2 = predicted_f2(graph, a = a, e = e) %>% bind_rows(rename(., pop1=pop2, pop2=pop1))
  pops = get_leafnames(graph)
  expand_grid(pop1 = pops, pop2 = pops, pop3 = pops, pop4 = pops) %>%
    filter(pop1 < pop2, pop1 < pop3, pop1 < pop4, pop3 < pop4, pop2 != pop3, pop2 != pop4) %>%
    left_join(f2 %>% rename(pop1 = pop1, pop3 = pop2, f13 = f2)) %>%
    left_join(f2 %>% rename(pop2 = pop1, pop4 = pop2, f24 = f2)) %>%
    left_join(f2 %>% rename(pop1 = pop1, pop4 = pop2, f14 = f2)) %>%
    left_join(f2 %>% rename(pop2 = pop1, pop3 = pop2, f23 = f2)) %>%
    mutate(f4 = (f14 + f23 - f13 - f24)/2) %>% ungroup %>% suppressMessages

}

graph_to_groebner = function(graph) {

  require(m2r)
  ge = graph_equations(graph, return_everything = TRUE)
  #vrs = eq %>% str_replace_all('[\\*\\+\\(\\)]', ' ') %>% str_replace_all('1-', '') %>% str_replace_all('-', '') %>%
  #  str_squish() %>% str_split(' ') %>% unlist %>% unique
  m2r::stop_m2()
  rng = m2r::ring_.(ge$coding$symbol, coefring = "QQ")

  out = m2r::gb_(ge$equations$eq2 %>% str_replace_all(' ', ''))
  m2r::stop_m2()
  out
}

find_invariants = function(graph, eps = 1e-7) {
  # find f4-statistics which are exactly 0

  pops = get_leafnames(graph)
  ge = graph_equations(graph)
  eq = ge$eq %>% transmute(p1 = pop1, p2 = pop2, eq = paste0('(', eq, ')')) %>%
    bind_rows(rename(., p1 = p2, p2 = p1))

  myenv = new.env()
  ge$coding %>% mutate(val = ifelse(type == 'e', 1, ifelse(type == 'a', runif(n()), NA))) %$%
    map2(symbol, val, ~assign(.x, .y, myenv)) %>% invisible

  expand_grid(pop1 = pops, pop2 = pops, pop3 = pops, pop4 = pops) %>%
    filter(pop1 < pop2, pop1 < pop3, pop1 < pop4, pop3 < pop4, pop2 != pop3, pop2 != pop4) %>%
    left_join(eq %>% rename(pop1 = p1, pop3 = p2, eq13 = eq)) %>%
    left_join(eq %>% rename(pop2 = p1, pop4 = p2, eq24 = eq)) %>%
    left_join(eq %>% rename(pop1 = p1, pop4 = p2, eq14 = eq)) %>%
    left_join(eq %>% rename(pop2 = p1, pop3 = p2, eq23 = eq)) %>%
    rowwise %>%
    mutate(f4 = paste(eq14, ' + ', eq23, ' - ', eq13, ' - ', eq24), res = eval(parse(text = f4), envir=myenv)) %>%
    ungroup %>%
    filter(between(res, -eps, eps)) %>% select(pop1:pop4) %>% suppressMessages
}


#' List clades in a graph
#'
#' @export
#' @param graph An admixture graph
#' @param eps Used to determine what counts as zero
#' @return A data frame with all (non-redundant) zero f4-statistics
#' @examples
#' \dontrun{
#' summarize_zerof4(example_igraph)
#' }
summarize_zerof4 = function(graph, eps = 1e-7) {

  pp = path_pairs(graph) %>% select(pop1, pop2, edges, admix, lr) %>% mutate(i = 1:n()) %>% filter(pop1 < pop2)
  pp2 = pp %>% select(-pop1, -pop2, -edges) %>% unnest(admix) %>% mutate(lr = pp %>% unnest(lr) %>% pull(lr))
  adm = names(which(degree(graph, mode = 'in') > 1))
  admval = runif(length(adm)) %>% set_names(adm)
  #ne = length(E(graph)) - 2*length(adm)
  driftval = runif(length(E(graph))) %>% set_names(attr(E(graph), 'vnames'))

  admixvals = pp2 %>%
    mutate(aval = ifelse(lr == 0, admval[admix], 1-admval[admix])) %>%
    group_by(i) %>% summarize(aval = prod(aval), .groups = 'drop')

  f2 = pp %>% left_join(admixvals, by = 'i') %>% mutate(aval = replace_na(aval, 1)) %>% unnest(edges) %>% mutate(eval = driftval[edges]) %>% group_by(pop1, pop2) %>% summarize(f2 = sum(aval * eval), .groups = 'drop') %>% ungroup %>% bind_rows(rename(., pop1 = pop2, pop2 = pop1))

  pops = get_leafnames(graph)
  expand_grid(pop1 = pops, pop2 = pops, pop3 = pops, pop4 = pops) %>%
    filter(pop1 < pop2, pop1 < pop3, pop1 < pop4, pop3 < pop4, pop2 != pop3, pop2 != pop4) %>%
    left_join(f2 %>% rename(pop1 = pop1, pop3 = pop2, f13 = f2)) %>%
    left_join(f2 %>% rename(pop2 = pop1, pop4 = pop2, f24 = f2)) %>%
    left_join(f2 %>% rename(pop1 = pop1, pop4 = pop2, f14 = f2)) %>%
    left_join(f2 %>% rename(pop2 = pop1, pop3 = pop2, f23 = f2)) %>%
    mutate(f4 = (f14 + f23 - f13 - f24)/2) %>% ungroup %>% suppressMessages %>%
    filter(between(f4, -eps, eps)) %>% select(pop1:pop4)
}

#' List clades in a list of graphs
#'
#' @export
#' @param graphlist A list of admixture graphs
#' @return A data frame with columns `pop1`, `pop2`, `pop3`, `pop4`, `n`, `frac`
#' @examples
#' \dontrun{
#' summarize_zerof4_list(graphlist)
#' }
summarize_zerof4_list = function(graphlist) {

  tot = length(graphlist)
  graphlist %>% map(summarize_zerof4) %>% bind_rows(.id = 'g') %>%
    count(pop1, pop2, pop3, pop4) %>% mutate(frac = n/tot)
}

normalize_zerof4 = function(zerof4) {
  zerof4[,1:4] %>% as_tibble(.name_repair = ~c('pop1', 'pop2', 'pop3', 'pop4')) %>%
    transmute(p1 = pmin(pop1, pop2),
              p2 = pmax(pop1, pop2),
              p3 = pmin(pop3, pop4),
              p4 = pmax(pop3, pop4)) %>%
    transmute(pop1 = if_else(p1 < p3, p1, p3),
              pop2 = if_else(p1 < p3, p2, p4),
              pop3 = if_else(p1 < p3, p3, p1),
              pop4 = if_else(p1 < p3, p4, p2)) %>% distinct
}

#' Test f4 constraints on a graph
#'
#' This function returns `TRUE` if and only if the admixture graph is compatible with
#' the f4-statistics listed in `nonzero_f4` being non-zero
#' @export
#' @param graph An admixture graph
#' @param nonzero_f4 A data frame or matrix with four columns. One row for each f4-statistic which is
#' expected to be non-zero
#' @return `TRUE` if all constraints are satisfied, else `FALSE`
#' @examples
#' \dontrun{
#' # Test whether f4(A,B; C,D) is expected to be non-zero in this graph:
#' nonzero_f4 = matrix(c('A', 'B', 'C', 'D'), 1)
#' satisfies_nonzerof4(random_admixturegraph(5, 2), nonzero_f4)
#' }
satisfies_nonzerof4 = function(graph, nonzero_f4) {

  if(is.null(nonzero_f4) || nrow(nonzero_f4) == 0) return(TRUE)
  unexpected_f4 = nonzero_f4 %>% normalize_zerof4 %>%
    inner_join(summarize_zerof4(graph)) %>% suppressMessages()
  nrow(unexpected_f4) == 0
}

#' Test f4 constraints on a graph
#'
#' This function returns `TRUE` if and only if the admixture graph is compatible with
#' the f4-statistics listed in `nonzero_f4` being non-zero
#' @export
#' @param graph An admixture graph
#' @param zero_f4 A data frame or matrix with four columns. One row for each f4-statistic which is
#' expected to be zero
#' @return `TRUE` if all constraints are satisfied, else `FALSE`
#' @examples
#' \dontrun{
#' # Test whether Chimp and Altai are a clade with respect to all populations X and Y:
#' # (whether f4("Chimp", "Altai"; X, Y) is 0 for all pairs of X and Y)
#' zero_f4 = expand_grid(pop1 = "Chimp", pop2 = "Altai", pop3 = X, pop4 = Y)
#' satisfies_zerof4(random_admixturegraph(5, 2), zero_f4)
#' }
satisfies_zerof4 = function(graph, zero_f4) {

  if(is.null(zero_f4) || nrow(zero_f4) == 0) return(TRUE)
  unexpected_f4 = zero_f4 %>% normalize_zerof4 %>%
    anti_join(summarize_zerof4(graph)) %>% suppressMessages()
  nrow(unexpected_f4) == 0
}



satisfies_oneevent = function(graph, earlier1, earlier2, later1, later2, type = 1) {

  root = get_rootname(graph)

  if(is.na(earlier2)) {
    earlier2 = names(neighbors(graph, earlier1, mode='in'))
    if(earlier2 == root) return(TRUE)
  }
  if(is.na(later2)) {
    later2 = names(neighbors(graph, later1, mode='in'))
    if(later2 == root) return(FALSE)
  }
  pe1 = all_simple_paths(graph, earlier1, root, mode = 'in')
  pe2 = all_simple_paths(graph, earlier2, root, mode = 'in')
  pl1 = all_simple_paths(graph, later1, root, mode = 'in')
  pl2 = all_simple_paths(graph, later2, root, mode = 'in')
  if(length(pe1) == 0 || length(pe2) == 0 || length(pl1) == 0 || length(pl2) == 0) stop('satisfies_oneevent error')

  ppis = expand_grid(expand_grid(pe1, pe2) %>% mutate(i = 1:n()),
                     expand_grid(pl1, pl2) %>% mutate(j = 1:n())) %>% rowwise %>%
    mutate(is1 = list(intersect(pe1, pe2)), is2 = list(intersect(pl1, pl2)),
           diff1 = list(setdiff(is1, is2)), diff2 = list(setdiff(is2, is1)),
           d1len = length(diff1), d2len = length(diff2), cont1 = is1[1] %in% is2) %>% ungroup %>%
    group_by(j) %>% mutate(maxdiff1len = max(d1len)) %>%
    group_by(i) %>% mutate(maxdiff2len = max(d2len)) %>% ungroup

  if(type == 1) {
    m10 = min(ppis$maxdiff1len) == 0
    m20 = min(ppis$maxdiff2len) == 0
    if(m10 && !m20) res = TRUE
    else if(m20 && !m10) res = FALSE
    else res = NA
  } else {
    res = all(ppis$cont1)
  }
  res
}

#' Test f4 constraints on a graph
#'
#' This function returns `TRUE` if and only if the admixture graph is compatible with
#' all event orders listed in eventorder
#' @export
#' @param graph An admixture graph
#' @param eventorder A data frame with columns `earlier1`, `earlier2`, `later1`, `later2`, optionally `type`
#' @param strict What to do in case some events are not determined by the graph.
#' If `strict = TRUE` (the default), the function will only return `TRUE` if there are no ambiguous constraints.
#' Otherwise, `TRUE` will be returned as long as no constraint is directly contradicted.
#' @return `TRUE` if all constraints are satisfied, else `FALSE`
#' @details Each row in `eventorder` represents a constraint that `earlier1` and `earlier2` split earlier than `later1` and `later2`. If `later2` is `NA`, `later2` will be set to the parent node of `later1`. By default (`type = 1`), a constraint will be satisfied as long as there is any lineage in which a split between `earlier1` and `earlier2` is ancestral to a split between `later1` and `later2`. `type = 2` is stricter and requires that the `earlier1`, `earlier2` split is ancestral to the `later1`, `later2` split in all lineages. In graphs with multiple admixture events there can be multiple splits between `earlier1`, `earlier2` and `later1`, `later2`, and many ways in which these splits can relate to each other. The current implementation only covers some of the many possible topological relationships.
#' @examples
#' \dontrun{
#' # Test whether the split between A and B is earlier than the split between C and D,
#' #   and whether the split between C and D is earlier than the terminal branch leading to E
#' constrain_events = tribble(
#'   ~earlier1, ~earlier2, ~later1, ~later2,
#'   'A', 'B', 'C', 'D',
#'   'C', 'D', 'E', NA)
#' satisfies_eventorder(random_admixturegraph(5, 0), eventorder = constrain_events)
#' }
satisfies_eventorder = function(graph, eventorder, strict = TRUE) {

  if(is.null(eventorder)) return(TRUE)
  if(!'type' %in% names(eventorder)) {
    eventorder$type = 1
  }
  status = eventorder %>% rowwise %>%
    mutate(ok = satisfies_oneevent(graph, earlier1, earlier2, later1, later2, type)) %>%
    ungroup %>% pull(ok)
  if(strict) return(isTRUE(all(status)))
  all(na.omit(status))
}

frac_eventorder = function(graph, eventorder, strict = TRUE) {

  if(is.null(eventorder)) return(TRUE)
  status = eventorder %>% rowwise %>%
    mutate(ok = satisfies_oneevent(graph, earlier1, earlier2, later1, later2)) %>%
    ungroup %>% pull(ok)
  if(strict) return(mean(!is.na(status)))
  mean(status, na.rm=T)
}


which_eventorder = function(graph, eventorder, strict = TRUE) {

  eventorder %>% rowwise %>%
    mutate(status = satisfies_oneevent(graph, earlier1, earlier2, later1, later2)) %>%
    ungroup
}



#' List number of admixture events for each population
#'
#' @export
#' @param graph An admixture graph
#' @return A data frame with columns `pop` and `nadmix`
#' @examples
#' \dontrun{
#' summarize_numadmix(example_igraph)
#' }
summarize_numadmix = function(graph) {
  # returns a data frame which lists the maximum number of admixture events for each population

  adm = names(which(degree(graph, mode = 'in') > 1))
  root = get_rootname(graph)
  graph %>% get_leafnames %>% set_names %>%
    map(~all_simple_paths(graph, root, ., mode = 'out') %>% map_dbl(~length(intersect(adm, names(.)))) %>% max) %>%
    unlist %>% enframe('pop', 'nadmix')
}

#' List number of admixture events for each population in a list of graphs
#'
#' @export
#' @param graphlist A list of admixture graphs
#' @return A data frame with columns `pop`, `mean`, `mean_nonzero`, `min`, `max`
#' @examples
#' \dontrun{
#' summarize_numadmix_list(graphlist)
#' }
summarize_numadmix_list = function(graphlist) {

  tot = length(graphlist)
  graphlist %>% map(summarize_numadmix) %>% bind_rows(.id = 'g') %>% group_by(pop) %>%
    summarize(mean = mean(nadmix), mean_nonzero = mean(nadmix != 0),
              min = min(nadmix), max = max(nadmix), .groups = 'drop')
}


#' Test admixture constraints on a graph
#'
#' This function returns `TRUE` if and only if the admixture graph satisfies all constraints on
#' the number of admixture events for the populations in `admix_constraints`
#' @export
#' @param graph An admixture graph
#' @param admix_constraints A data frame with columns `pop`, `min`, `max`
#' @return `TRUE` if all admixture constraints are satisfied, else `FALSE`
#' @examples
#' \dontrun{
#' # At least one admixture event for C, and none for D:
#' constrain_cd = tribble(
#'   ~pop, ~min, ~max,
#'   'C', 1, NA,
#'   'D', NA, 0)
#' satisfies_numadmix(random_admixturegraph(5, 2), constrain_cd)
#' }
satisfies_numadmix = function(graph, admix_constraints) {
  # admix_constraints is data frame with minimum and maximum admixture for each population

  if(is.null(admix_constraints)) return(TRUE)
  if(length(setdiff(c('min', 'max', 'pop'), names(admix_constraints))) > 0)
    stop("'admix_constraints' should have columns 'pop', 'min', 'max'!")
  unexpected_admix = admix_constraints %>%
    left_join(summarize_numadmix(graph), by = 'pop') %>%
    filter(nadmix < min | nadmix > max)
  nrow(unexpected_admix) == 0
}


#' Test constraints on a graph
#'
#' This function tests whether a graph satisfies a number of different types of constraints
#' @export
#' @param graph An admixture graph
#' @param nonzero_f4 A data frame or matrix with four columns. One row for each f4-statistic which is
#' observed to be significantly non-zero
#' @param admix_constraints A data frame with columns `pop`, `min`, `max`
#' @param event_order A data frame with columns `earlier1`, `earlier2`, `later1`, `later2`
#' @return `TRUE` if all admixture constraints are satisfied, else `FALSE`
#' @seealso \code{\link{satisfies_numadmix}}, \code{\link{satisfies_zerof4}}, \code{\link{satisfies_eventorder}}
#' \dontrun{
#' # At least one admixture event for C, and none for D:
#' constrain_cd = tibble(pop = c('C', 'D'), min = c(1, NA), max = c(NA, 0))
#' satisfies_numadmix(random_admixturegraph(5, 2), constrain_cd)
#' }
satisfies_constraints = function(graph, nonzero_f4 = NULL, admix_constraints = NULL, event_order = NULL) {

  satisfies_nonzerof4(graph, nonzero_f4) &&
    satisfies_numadmix(graph, admix_constraints) &&
    satisfies_eventorder(graph, event_order)
}


#' Test if a tree is part of a graph
#'
#' This function tests whether a tree is part of a graph. This is useful for testing whether a Y-chromosome tree is consistent with an autosomal admixture graph. Leaf node names matter, but internal node names are ignored.
#' @export
#' @param tree An admixture graph without admixture event
#' @param graph An admixture graph
#' @return `TRUE` if all admixture constraints are satisfied, else `FALSE`
#' \dontrun{
#' tree = graph_splittrees(example_igraph) %>% pull(graph) %>% pluck(1)
#' tree_in_graph(tree, example_igraph)
#' }
tree_in_graph = function(tree, graph) {
  # returns TRUE if tree is in graph

  treehash = graph_hash(tree)
  splithashes = graph_splittrees(graph) %>% rowwise %>%
    mutate(hash = graph_hash(graph)) %>% pull(hash)
  treehash %in% splithashes
}


is_clade = function(geno, pops1, pops2, i1=1, i2=1, perm=100) {

  g1 = geno[,pops1[-i1]]-geno[,pops1[i1]]
  g2 = geno[,pops2[-i2]]-geno[,pops2[i2]]
  #cancor(g1, g2)$Summary
  obs = cancor(g1, g2)$cor[1]
  expected = c()
  for(i in seq_len(perm)) {
    expected[i] = cancor(g1, g2[sample(seq_len(nrow(g2))),])$cor[1]
  }
  pmax(1/perm, mean(expected > obs))
}


condense_graph = function(graph) {
  # groups pops by shared admixture, return graph of those groups
  # should be redone in a nicer way

  leaves = get_leafnames(graph)
  adm = names(which(degree(graph, mode = 'in') > 1))
  root = get_rootname(graph)
  #dist = igraph::distances(graph, mode = 'out') %>% as_tibble(rownames = 'from') %>%
  #  pivot_longer(-from, names_to = 'to', values_to = 'dist') %>% filter(is.finite(dist), dist > 0)


  grps = delete_vertices(graph, adm) %>% components %$% membership %>%
    split(., .) %>% map(names) %>% map(~intersect(., c(leaves, adm))) %>% compact
  nam = map_chr(grps, ~shortest_paths(graph, .[1], root, mode = 'in') %$%
                  vpath %>% pluck(1) %>% names %>% intersect(c(adm, root)) %>% `[`(1))
  names(grps) = nam
  nam2 = map_chr(grps, ~paste(sort(.), collapse=' '))
  #sg = adm %>% intersect(nam) %>% set_names %>% map(~neighbors(graph, ., mode = 'in') %>% names %>% map_chr(~shortest_paths(graph, ., root, mode = 'in') %$% vpath %>% pluck(1) %>% names %>% intersect(nam) %>% `[`(1)) %>% unique) %>% enframe('to', 'from') %>% select(2:1) %>% unnest(from)
  # sg = adm %>% set_names %>% map(~neighbors(graph, ., mode = 'in') %>% names %>% map_chr(~shortest_paths(graph, ., root, mode = 'in') %$% vpath %>% pluck(1) %>% names %>% intersect(nam) %>% `[`(1)) %>% unique) %>% enframe('to0', 'from0') %>% select(2:1) %>% unnest(from0)

  # nam2 %<>% c(setdiff(adm, names(grps)) %>% set_names %>% map_chr(~shortest_paths(graph, ., leaves, mode = 'out')$vpath %>% suppressWarnings %>% compact %>% pluck(1) %>% names %>% intersect(nam2) %>% `[`(1)))
  # sg %>% mutate(from = nam2[from0], to = nam2[to0]) %>% distinct(from, to)

  sgi = distances(graph, nam, nam, mode = 'out') %>% igraph::graph_from_adjacency_matrix() %>% igraph::simplify()
  while(!all(names(V(sgi)) %in% nam)) {
    node = setdiff(names(V(sgi)), nam)[1]
    parents = sgi %>% neighbors(node, mode = 'in') %>% names
    children = sgi %>% neighbors(node, mode = 'out') %>% names
    newedges = expand_grid(a=parents, b=children) %>% as.matrix %>% t %>% c
    sgi %<>% delete_vertices(node) %>% add_edges(newedges)
  }
  sgi %>% as_edgelist() %>% as_tibble(.name_repair = ~c('from0', 'to0')) %>% mutate(from = nam2[from0], to = nam2[to0]) %>% distinct(from, to)
}

pair_admix = function(graph) {

  graph %>% condense_graph %>% edges_to_igraph() %>% igraph::distances(mode = 'out') %>%
    igraph::graph_from_adjacency_matrix() %>% igraph::simplify() %>%
    as_edgelist %>% as_tibble(.name_repair = ~c('from', 'to')) %>% rowwise %>%
    mutate(from = str_split(from, ' '), to = str_split(to, ' ')) %>% unnest(from) %>% unnest(to) %>%
    transmute(from, to, dir = 1) %>%
    bind_rows(rename(., from = to, to = from) %>% mutate(dir = -dir)) %>%
    complete(from, to, fill = list(dir = 0)) %>% filter(from < to) %>% arrange(from, to)
}


triplet_proportions = function(fit) {

  graph = fit %>% edges_to_igraph()
  triples = path_triples(graph)
  adm = fit %>% filter(type == 'admix') %>%
    transmute(e = paste0(from, '|', to), weight) %>% deframe
  prop = triples %>% rowwise %>%
    mutate(w1 = prod(adm[admedges1]),
           w2 = prod(adm[admedges2]),
           w3 = prod(adm[admedges3]), w = w1*w2*w3) %>%
    group_by(pop1, pop2, pop3, og) %>% summarize(sm = sum(w)) %>% ungroup %>% suppressMessages()
  x = prop %>% select(1:3) %>% distinct
  miss = bind_rows(x %>% mutate(og = pop1), x %>% mutate(og = pop2), x %>% mutate(og = pop3)) %>%
    mutate(sm = 0) %>% anti_join(prop, by = c('pop1', 'pop2', 'pop3', 'og'))
  prop %>% bind_rows(miss) %>% arrange(pop1, pop2, pop3) %>% rename(outgroup = og, proportion = sm)
}


tree_triplets = function(tree) {
  # returns the population triplets that make up a tree
  # currently too slow to run on many trees

  leaves = get_leafnames(tree)
  root = get_rootname(tree)
  igraph::distances(tree, V(tree), leaves, mode = 'out') %>% as_tibble(rownames = 'from') %>% pivot_longer(-from, names_to = 'to', values_to = 'dist') %>% filter(!from %in% c(leaves, root)) %>% group_by(from) %>% summarize(l = list(to[is.finite(dist)]), out = list(to[!is.finite(dist)])) %>% rowwise %>% mutate(l = list(t(combn(l, 2)) %>% as_tibble)) %>% unnest(l) %>% unnest(out) %>% transmute(out, X1 = pmin(V1, V2), X2 = pmax(V1, V2)) %>% distinct

}

tree_isoutgroup = function(tree, outgroup, pop1, pop2) {

  root = get_rootname(tree)
  p1 = shortest_paths(tree, root, pop1)$vpath[[1]]
  p2 = shortest_paths(tree, root, pop2)$vpath[[1]]
  og = shortest_paths(tree, root, outgroup)$vpath[[1]]
  length(intersect(p1, og)) < length(intersect(p1, p2))
}



place_root = function(graph, from, to, outpop = NULL) {
  # places root at edge from -> to
  # may reduce number of admixture edges

  root = get_rootname(graph)
  children = neighbors(graph, root, mode = 'out') %>% names
  oldleaves = get_leafnames(graph)
  newroot = newnodenam('root', names(V(graph)))
  newg = graph %>% delete_vertices(root) %>% add_edges(c(children[1], children[2])) %>%
    add_vertices(1, name = newroot) %>% delete_edges(paste0(from, '|', to)) %>%
    add_edges(c(newroot, from, newroot, to)) %>%
    igraph::as.undirected(mode = 'collapse')
  dist = igraph::distances(newg, newroot)
  out = newg %>% as_edgelist %>% as_tibble(.name_repair = ~c('v1', 'v2')) %>%
    rowwise %>%
    mutate(from = ifelse(dist[,v1] <= dist[,v2], v1, v2), to = ifelse(from == v1, v2, v1)) %>%
    ungroup %>% select(from, to) %>% edges_to_igraph()
  count = 0
  newleaves =  setdiff(get_leafnames(out), oldleaves)
  while(length(newleaves) > 0) {
    g = out %>% igraph::delete_vertices(newleaves) %>% simplify_graph()
    newleaves = setdiff(get_leafnames(g), oldleaves)
    out = g
  }
  out
}

#' Modify a graph by changing the position of the root node
#'
#' @export
#' @param graph An admixture graph
#' @param fix_outgroup Keep outgroup in place
#' @return A new admixture graph
place_root_random = function(graph, fix_outgroup = TRUE) {

  stopifnot(is_simplified(graph) && is_valid(graph))
  outpop = get_outpop(graph)
  if(fix_outgroup && !is.null(outpop)) {
    graph %<>% delete_vertices(c(outpop, get_rootname(graph)))
  }
  randedge = graph %>% delete_vertices(c(get_rootname(.), get_leafnames(.))) %>%
    E %>% attr('vnames') %>% sample(1) %>% str_split('\\|') %>% pluck(1)
  graph %<>% place_root(randedge[1], randedge[2])
  if(fix_outgroup && !is.null(outpop)) {
    newroot = newnodenam('root', names(V(graph)))
    oldroot = get_rootname(graph)
    graph %<>% add_vertices(2, name = c(newroot, outpop)) %>% add_edges(c(newroot, outpop, newroot, oldroot))
  }
  graph
}



adjlist_find_paths = function(a, n, m, path = c()) {
  # Find paths from node index n to m using adjacency list a.
  path = c(path, n)
  if(n == m) return(list(path))
  paths = list()
  for(child in a[[n]]) {
    child = as.numeric(child)
    if (!child %in% path) {
      child_paths = adjlist_find_paths(a, child, m, path)
      for(child_path in child_paths) {
        paths = c(paths, list(child_path))
      }
    }
  }
  paths
}

paths_from_to = function(graph, from, to) {
  # Find paths in graph from vertex source to vertex dest
  adj = get.adjlist(graph, mode = 'out')
  nam = names(V(graph))
  adjlist_find_paths(adj, which(nam == from), which(nam == to)) %>% map(~nam[.])
}

all_paths = function(graph) {
  # returns all paths from root to leaves; list with one list for each leaf node
  root = get_rootname(graph)
  leaves = get_leafnames(graph)
  leaves %>% set_names %>% map(~paths_from_to(graph, root, .))
}


pair_percentages = function(graph) {
  # returns how often each population ordered pair of populations is observed among the internal node descendants

  leaves = get_leafnames(graph)
  internal = setdiff(names(V(graph)), leaves)
  dest = graph %>% distances(internal, leaves, mode = 'out') %>% as_tibble(rownames = 'from') %>%
    pivot_longer(-from, names_to = 'to', values_to = 'order') %>% filter(is.finite(order)) %>% select(-order)
  single = dest %>% count(to)
  dest %>% full_join(dest, by = 'from') %>% count(to.x, to.y) %>%
    left_join(single %>% transmute(to.x = to, n1 = n)) %>%
    left_join(single %>% transmute(to.y = to, n2 = n)) %>%
    mutate(frac1 = n/n1, frac2 = n/n2) %>% arrange(to.x, to.y) %>% suppressMessages()
}

graph_distance = function(graph1, graph2) {

  x1 = pair_percentages(graph1)
  x2 = pair_percentages(graph2)
  mean((c(x1$frac1, x1$frac2) - c(x2$frac1, x2$frac2))^2)
}

#' Pairwise distance estimates for graphs
#'
#' Computes a distance estimate for each graph pair. Each graph is first summarized as a vector which counts for every leaf pair how many internal nodes reach that pair. The distance between two graphs is the Euclidean distance between the vectors of two graphs, and is scaled to fall between 0 and 1.
#' @param graphlist List of graphs
#' @return A data frame with graph distances
graph_distances = function(graphlist) {

  numgraphs = length(graphlist)
  pp = map(graphlist, pair_percentages) %>% map(~c(.$frac1, .$frac2))
  expand_grid(graph1 = seq_len(numgraphs), graph2 = seq_len(numgraphs)) %>% filter(graph1 < graph2) %>%
    rowwise %>% mutate(dist = mean((pp[[graph1]] - pp[[graph2]])^2)) %>% ungroup
}


consistent_with_qpadm = function(graph, left, right, target) {

  pops = get_leafnames(graph)
  internal = setdiff(names(V(graph)), pops)
  graph %>% distances(internal, pops, mode = 'out') %>% as_tibble(rownames = 'from') %>%
    pivot_longer(-from, names_to = 'to', values_to = 'order') %>% filter(is.finite(order)) %>% select(-order) %>% mutate(type = case_when(to %in% left ~ 'left', to %in% right ~ 'right', to %in% target ~ 'target')) %>% group_by(from) %>% filter('right' %in% type & 'target' %in% type & !'left' %in% type) %>% nrow %>% equals(0)

}

#' Partition a list of populations into left and right populations
#'
#' @export
#' @param pops A vector of populations
#' @param allpops Return only models which use all provided populations
#' @param more_right Return only models with more right than left populations
#' @return A data frame with one qpadm model per row
#' @seealso \code{\link{qpadm_models}}, \code{\link{graph_f2_function}}
#' @examples
#' \dontrun{
#' graph_to_qpadm(get_leafnames(example_igraph))
#' }
qpadm_models = function(pops, allpops = TRUE, more_right = TRUE) {

  models = tibble(l = power_set(pops))
  if(!is.logical(allpops)) stop('"allpops" should be TRUE or FALSE!')
  if(allpops) {
    models %<>% rowwise %>% mutate(r = list(setdiff(pops, l)))
  } else {
    models %<>% rowwise %>% mutate(r = list(power_set(setdiff(pops, l)))) %>% unnest(r)
  }
  models %<>% rowwise %>% filter(length(r) > 0) %>% ungroup
  if(more_right) models %<>% rowwise %>% filter(length(r) > length(l)) %>% ungroup
  models
}


#' Get all qpadm models for a graph
#'
#' This function tests which qpadm models should be valid for an admixture graph and a target population. By default, all models returned by \code{\link{qpadm_models}} are tested. For large graphs this will be too slow, and you may want test only some models by providing the `models` argument, or only a single model by providing the `left` and `right` arguments.
#' @details Two validity criteria are tested for each qpadm model: Rank validity and weight validity. Rank validity means that the rank of the f4 statistic matrix for only left and right populations is the same as the rank of the f4 statistic matrix that includes the target population among the left populations. Weight validity means that the estimated admixture weights for all left populations are between 0 and 1.
#' @export
#' @param graph An admixture graph
#' @param target Name of the target population.
#' @param left Left populations (provide this optionally if you want to test only a single qpadm model)
#' @param right Right populations (provide this optionally if you want to test only a single qpadm model)
#' @param models A two column nested data frame with models to be evaluated, one model per row. The first column, `l`, should contain the left populations, the second column, `r`, should contain the right populations. The target population is provided separately in the `target` argument.
#' @param weights Set this to `FALSE` to return only information on the ranks, not the weights, of each qpadm model. The ranks should depend only on the graph topology, while the weights and weight-validity (all weights for left populations between 0 and 1) can depend on the branch lengths of the graph. By default f4-statistics are based on equal branch lengths and admixture weights of 0.5. This can be overridden by providing `f4dat`.
#' @param f4dat A data frame of f4-statistics which can be provided to override the default branch lengths.
#' @param allpops Evaluate only models which use all populations in the admixture graph. See \code{\link{qpadm_models}}
#' @param return_f4 Include f4 statistic matrices in the results (default `FALSE`)
#' @param eps Epsilon value close to zero which is used for determining which f4 matrix elements should be considered non-zero, and which weights are strictly between 0 and 1.
#' @return A data frame with one qpadm model per row and columns `valid_rank` and `valid_weights` indicating whether a model should be valid under the graph.
#' @note An earlier version of this function tried to use the graph topology for identifying valid qpadm models, but this didn't work reliably. Christian Huber had the idea of using the ranks of expected f4 statistic matrices instead.
#' @seealso \code{\link{qpadm_models}}, \code{\link{graph_f2_function}}
#' @examples
#' \dontrun{
#' graph2 = example_igraph %>% simplify_graph() %>%
#'   delete_admix('N3N0', 'N3N4') %>% delete_admix('N3N1', 'N3N8')
#' graph_to_qpadm(graph2, 'Mbuti.DG') %>% filter(valid_rank, valid_weights)
#' }
graph_to_qpadm = function(graph, target, left = NULL, right = NULL, models = NULL,
                          weights = TRUE, f4dat = NULL, allpops = TRUE, more_right = TRUE, return_f4 = FALSE, eps = 1e-10) {

  if(is.null(models)) {
    if(!is.null(left) && !is.null(right)) {
      models = tibble(l = list(left), r = list(right))
    } else {
      models = get_leafnames(graph) %>% setdiff(target) %>%
        qpadm_models(allpops = allpops, more_right = more_right)
    }
  }
  if(is.null(f4dat)) f4dat = graph_f2_function(graph, random_defaults=FALSE)() %>% f2dat_f4dat()
  out = models %>% rowwise %>%
    mutate(target,
           m1 = list(f4dat_f4mat(f4dat, l, r, eps = eps)),
           m2 = list(f4dat_f4mat(f4dat, c(target, l), r, eps = eps)),
           rank1 = qr(m1)$rank,
           rank2 = qr(m2)$rank,
           fullrank = length(l)-1 == rank1,
           stablerank = rank1 == rank2)
  if(weights) {
    out %<>%
      mutate(w = list(cpp_qpadm_weights(m2, diag(prod(dim(m2))), rnk = min(dim(m2)), qpsolve=qpsolve)$weights),
             wmin = min(w), wmax = max(w), targetadmixed = isTRUE(wmin > 0+eps & wmax < 1-eps),
             valid = fullrank & stablerank & targetadmixed) #%>% select(-w)
  }
  if(!return_f4) out %<>% select(-m1, -m2)
  out %>% ungroup
}

qpadm_to_graphs = function(target, left, right, nadmix=0, ntry = 100) {

  pops = c(target, left, right)
  graphs = rerun(ntry, random_admixturegraph(as.numeric(length(pops)), nadmix))
  map(graphs, ~graph_to_qpadm(.x, target, left, right)) %>%
    bind_rows() %>% mutate(graph=graphs) %>% select(-c(l,r,target))
}


f4dat_f4mat = function(f4dat, left, right, eps = NA) {

  mat = f4dat %>%
    filter(pop1 == left[1], pop2 %in% left[-1], pop3 == right[1], pop4 %in% right[-1]) %>%
    select(-pop1,-pop3) %>% pivot_wider(pop2, names_from='pop4', values_from='f4') %>%
    select(-pop2) %>% as.matrix
  mat[abs(mat) < eps] = 0
  mat
}

consistent_with_qpwave = function(graph, left, right) {

  edges = graph %>% delete_vertices(pops) %>% {attr(E(.), 'vnames')}

  out = tibble()
  for(i in 1:3) {
    ecomb = t(combn(edges, i))
    new = map(seq_len(nrow(ecomb)), ~{
      comp = components(delete_edges(graph, ecomb[.,]))
      if(comp$no == 2 && min(comp$csize) > 1) comp$membership %>% enframe %>%
        filter(name %in% pops) %$% split(name, value) %>% enframe %>% rowwise %>% mutate(len = length(value))
    }) %>% bind_rows(.id = 'comp') %>% group_by(comp) %>% filter(min(len) > 1) %>% select(-len) %>% pivot_wider(names_from = name, values_from = value) %>% ungroup %>% transmute(num = i, l = `1`, r = `2`)
    out %<>% bind_rows(new)
  }

}

#' Simulate allele frequncies under an admixture graph
#'
#' This function performs a very crude simulation of allele frequencies
#' under an admixture graph model
#' @export
#' @param graph An admixture graph as igraph object, or as edge list data frame
#' with a column `weight`, as returned by `qpgraph()$edges`
#' @param nsnps Number of SNPs to simulate
#' @param drift_default Default branch lengths. Ignored if `graph` is a data frame with weights
#' @param admix_default Default admixture weights. Ignored if `graph` is a data frame with weights
#' @param leaves_only Return allele frequencies for leaf nodes only
#' @return A data frame with allele frequencies
#' @seealso \code{\link{graph_to_pcs}}
#' @examples
#' \dontrun{
#' afs = graph_to_afs(example_igraph)
#' }
graph_to_afs = function(graph, nsnps = 1e4, drift_default=0.02, admix_default=0.5, leaves_only = FALSE) {

  if(is.data.frame(graph)) {
    weights = graph
    graph = graph %>% select(from, to) %>% edges_to_igraph
  } else {
    weights = graph %>% igraph::as_edgelist() %>%
      as_tibble(.name_repair = ~c('from', 'to')) %>% add_count(to) %>%
      mutate(weight = ifelse(n > 1, admix_default, drift_default)) %>% select(-n)
  }

  rootaf = runif(nsnps)
  nodes = names(igraph::V(graph))
  afs = list()
  aw = function(graph, node) {
    parents = names(igraph::neighbors(graph, node, mode = 'in'))
    children = names(igraph::neighbors(graph, node, mode = 'out'))
    isadmix = length(parents) > 1
    if(!isadmix) {
      if(length(parents) == 0) {
        afs[[node]] <<- rootaf
      } else {
        w = weights %>% filter(from == parents, to == node) %>% pluck('weight', 1)
        afs[[node]] <<- pmin(1,pmax(0,afs[[parents]] + w*runif(nsnps)))
      }
      for(child in children) {
        aw(graph, child)
      }
    } else {
      if(!is.null(afs[[parents[1]]]) && !is.null(afs[[parents[2]]])) {
        w1 = weights %>% filter(from == parents[1], to == node) %>% pluck('weight', 1)
        w2 = weights %>% filter(from == parents[2], to == node) %>% pluck('weight', 1)
        afs[[node]] <<- afs[[parents[1]]]*w1 + afs[[parents[2]]]*w2
        for(child in children) {
          aw(graph, child)
        }
      }
    }
  }
  aw(graph, get_rootname(graph))
  out = afs %>% bind_cols
  if(leaves_only) out = out[get_leafnames(graph)]
  out
}

#' Simulate PCs under an admixture graph
#'
#' This function simulates PCA of allele frequencies under an admixture graph model
#' @export
#' @param graph An admixture graph as igraph object, or as edge list data frame
#' with a column `weight`, as returned by `qpgraph()$edges`
#' @param nsnps Number of SNPs to simulate
#' @param drift_default Default branch lengths. Ignored if `graph` is a data frame with weights
#' @param admix_default Default admixture weights. Ignored if `graph` is a data frame with weights
#' @param leaves_only Return PCs for leaf nodes only
#' @return A data frame with PCs for each population
#' @seealso \code{\link{graph_to_afs}}
#' @examples
#' \dontrun{
#' pcs = graph_to_pcs(example_igraph)
#' pcs %>% ggplot(aes(PC1, PC2, label = pop)) + geom_text() + geom_point()
#' }
graph_to_pcs = function(graph, nsnps = 1e4, drift_default=0.02, admix_default=0.5, leaves_only = TRUE) {

  afs = graph_to_afs(graph, nsnps = nsnps, drift_default = drift_default,
                     admix_default = admix_default, leaves_only = leaves_only)
  prcomp(t(as.matrix(afs)))$x %>% as_tibble(rownames = 'pop')
}


#' List leaf nodes for all internal nodes
#'
#' @export
#' @param graph An admixture graphs
#' @return A data frame with columns `from`, `to`, `id`
#' @examples
#' \dontrun{
#' summarize_descendants(graph)
#' }
summarize_descendants = function(graph) {

  leaves = get_leafnames(graph)
  internal = setdiff(names(V(graph)), leaves)
  graph %>% distances(internal, leaves, mode = 'out') %>% as_tibble(rownames = 'from') %>%
    pivot_longer(-from, names_to = 'to', values_to = 'order') %>% filter(is.finite(order)) %>%
    select(-order) %>% group_by(from) %>% mutate(id = paste(sort(to), collapse = ' ')) %>% ungroup
}

#' List leaf nodes for all internal nodes in a list of graphs
#'
#' @export
#' @param graphlist A list of admixture graphs
#' @param rename If `FALSE` (default), the output will be a data frame indicating how often each node in each graph is observed in all other graphs. If `TRUE`, the output will be a list, where the inner nodes will be renamed to have these percentages as part of their name. \code{\link{plot_graph}} will print the percentages of graphs renamed in this way.
#' @return A data frame with columns `graph`, `from`, `n`, `frac`
#' @examples
#' \dontrun{
#' summarize_descendants_list(graphlist)
#' }
summarize_descendants_list = function(graphlist, rename = FALSE) {

  ngraphs = length(graphlist)
  dat = graphlist %>% map(summarize_descendants) %>% bind_rows(.id = 'graph') %>% select(-to) %>% distinct %>%
    mutate(graph = as.numeric(graph))
  counts = dat %>% select(-from) %>% distinct %>% count(id)
  out = dat %>% left_join(counts, by = 'id') %>% mutate(frac = n/ngraphs)
  if(rename) {
    pops = get_leafnames(graphlist[[1]])
    out = map2(graphlist, split(out, out$graph), ~{
    nam = .y %>% transmute(from, to = paste0(from, ' (', round(frac*100), ')')) %>%
      bind_rows(tibble(from = pops, to = pops)) %>% deframe
    .x %>% as_edgelist() %>% as_tibble(.name_repair = ~c('from', 'to')) %>%
      mutate(from = nam[from], to = nam[to]) %>% edges_to_igraph
  })
  }
  out
}


partition_fit = function(fit) {

  leaves = get_leafnames(graph)
  internal = setdiff(names(V(graph)), leaves)
  dest = graph %>% igraph::distances(internal, leaves, mode = 'out') %>% as_tibble(rownames = 'from') %>%
    pivot_longer(-from, names_to = 'to', values_to = 'order') %>% filter(is.finite(order)) %>% select(-order)
}


summarize_driftpartition = function(edges, all = FALSE) {

  graph = edges %>% edges_to_igraph()
  leaves = get_leafnames(graph)
  internal = setdiff(names(V(graph)), leaves)
  desc = graph %>% igraph::distances(internal, leaves, mode = 'out') %>% as_tibble(rownames = 'from') %>%
    pivot_longer(-from, names_to = 'to', values_to = 'order') %>% filter(is.finite(order)) %>%
    select(-order) %>% group_by(from) %>% mutate(id = paste(sort(to), collapse = ' ')) %>% ungroup

  out = edges %>% filter(type == 'edge') %>% select(from, to, weight) %>% left_join(desc %>% select(-to) %>% distinct, by = c('to' = 'from')) %>% mutate(id = ifelse(is.na(id), to, id)) %>% rowwise %>% mutate(id2 = paste0(sort(setdiff(leaves, str_split(id, ' ')[[1]])), collapse = ' ')) %>% ungroup %>% mutate(s1 = pmin(id, id2), s2 = pmax(id, id2)) %>% group_by(s1, s2) %>% summarize(weight = sum(weight), .groups = 'drop') %>% bind_rows(rename(., s1=s2, s2=s1)) %>% rowwise %>% transmute(s1, s2, c1 = str_count(s1, ' ')+1, c2 = str_count(s2, ' ')+1, weight) %>% ungroup
  if(all) {
    x = tibble(s1 = head(power_set(sort(leaves)), -1)) %>% rowwise %>%
      mutate(s2 = list(sort(setdiff(leaves, s1))),
             c1 = length(s1), c2 = length(s2),
             s1 = paste0(s1, collapse = ' '),
             s2 = paste0(s2, collapse = ' '), weight = 0) %>% ungroup
    out = x %>% anti_join(out, by = c('s1', 's2')) %>% bind_rows(out)
  }
  out %>% arrange(s1, s2)
}




graph_boot_score = function(bootfit, graph = NULL, f2_blocks = NULL) {

  if(is.null(bootfit)) bootfit = qpgraph_resample_snps(f2_blocks, boot = 100, graph = graph, numstart = 5, return_fstats = TRUE)
  if(!'f4' %in% names(bootfit)) stop("Need to run with `return_fstats = TRUE`!")
  x = bootfit$f4 %>% bind_rows(.id = 'id') %>% mutate(id = as.numeric(id)) %>%
    filter(pop1 == pop3, pop2 == pop4) %>% select(-pop3, -pop4) %>% arrange(id, pop1, pop2) %>%
    transmute(id, p = paste(pop1, pop2), diff)
  means = x %>% group_by(p) %>% summarize(mn = mean(diff), var = var(diff), .groups = 'drop')

  covmat = x %>% left_join(x, by = 'id') %>%
    group_by(p.x, p.y) %>% summarize(cv = cov(diff.x, diff.y), .groups = 'drop') %>%
    pivot_wider(p.x, names_from = p.y, values_from = cv) %>% select(-p.x) %>% as.matrix

  stat = means$mn %*% (pracma::pinv(covmat)) %*% means$mn
  stat[1,1]

}


graph_boot_pval = function(bootfit) {

  stat = graph_boot_score(bootfit)
  rnk = bootfit$edges[[1]] %>% edges_to_igraph() %>% graph_rank()
  pchisq(stat, rnk, lower.tail = F)
}

#' Convert agraph to igraph
#'
#' `agraph` is the format used by the `admixturegraph` packge. `igraph` is used by the `admixtools` package
#' @export
#' @param agraph An admixture graph in \code{\link{agraph}} format
#' @return An admixture graph in \code{\link{igraph}} format
#' @examples
#' \dontrun{
#' agraph_to_igraph(agraph)
#' }
agraph_to_igraph = function(agraph) {
  igraph::graph_from_adjacency_matrix(agraph$children)
}

#' Convert igraph to agraph
#'
#' `agraph` is the format used by the `admixturegraph` packge. `igraph` is used by the `admixtools` package
#' @export
#' @param agraph An admixture graph in \code{\link{igraph}} format
#' @return An admixture graph in \code{\link{agraph}} format
#' @examples
#' \dontrun{
#' igraph_to_agraph(example_igraph)
#' }
igraph_to_agraph = function(igraph) {

  leaves = get_leafnames(igraph)
  inner_nodes = setdiff(names(V(igraph)), leaves)
  e = igraph %>% as_edgelist() %>% `[`(,c(2,1)) %>% cbind(NA)
  admixturegraph::agraph(leaves, inner_nodes, e)
}



rename_internal = function(graph) {

  isedge = FALSE
  if(!'igraph' %in% class(graph)) {
    isedge = TRUE
    edges = graph
    graph %<>% edges_to_igraph()
  }

  leaves = get_leafnames(graph)
  nammap = graph %>% distances(names(V(graph)), leaves, mode = 'out') %>% as_tibble(rownames = 'from') %>%
    pivot_longer(-from, names_to = 'to', values_to = 'order') %>% filter(is.finite(order)) %>%
    select(-order) %>% group_by(from) %>% mutate(id = paste(sort(to), collapse = '\n')) %>% ungroup %>%
    select(-to) %>% distinct %>% group_by(id) %>%
    mutate(n = 1:n(), id = ifelse(n == n(), id, paste(id, n, sep = '_'))) %>% select(-n) %>% deframe
  if(isedge) {
    edges %<>% mutate(from = nammap[from], to = nammap[to])
    return(edges)
  }
  graph %>% set_vertex_attr('name', value = nammap[names(V(graph))])
}


label_internal = function(edges) {

  # assigns leaf nodes to internal nodes, if the internal node contributes more to one leaf than to all others

  graph = edges %>% edges_to_igraph()
  leaves = get_leafnames(graph)
  internal = setdiff(names(V(graph)), leaves)
  wvec = edges %>% mutate(weight = ifelse(type == 'admix', weight, 1)) %>% transmute(e = paste(from, to, sep = '|'), weight) %>% deframe

  iweights = imap_dfr(internal, ~{
    node = .x
    igraph::all_simple_paths(graph, node, leaves) %>%
      set_names(map_chr(., ~attr(tail(., 1), 'name'))) %>%
      map(admixtools:::vs_to_es) %>% map(~wvec[.]) %>% map(prod) %>%
      enframe %>% unnest(value) %>% group_by(name) %>%
      summarize(weight = sum(value)) %>% transmute(from = node, to = name, weight)
  })

  int_labels = iweights %>% complete(to, from, fill = list(weight = 0)) %>% group_by(from) %>% arrange(-weight) %>% slice_head(n = 2) %>% mutate(diff = weight[1]-weight[2]) %>% filter(diff > 0) %>% slice_head(n=1) %>% ungroup

}

leaf_internal_ancestry = function(edges) {

  # returns data frame with all leaf nodes with ancestry that has been mapped to
  # another leaf node by 'label_internal'

  graph = edges %>% edges_to_igraph()
  leaves = get_leafnames(graph)
  internal = setdiff(names(V(graph)), leaves)
  root = get_rootname(graph)

  int_labels = label_internal(edges)
  map_dfr(leaves, ~{
    node = .
    x = igraph::all_simple_paths(graph, root, node) %>% unlist %>% names %>% unique
    from = int_labels %>% filter(to != node, from %in% x) %>% pull(to) %>% unique
    tibble(from = from, to = node)
  })
}

#' Count zero-length edges
#'
#' `agraph` is the format used by the `admixturegraph` packge. `igraph` is used by the `admixtools` package
#' @param edges Edges data frame from fitted admixture graph
#' @param epsilon Every edge with length
#' @return The number of edges with length < epsilon
#' @examples
#' \dontrun{
#' fit = qpgraph(example_f2_blocks, example_igraph)
#' count_zero_edges(fit$edges)
#' }
count_zero_edges = function(edges, epsilon = 1e-6) {

  if(!'type' %in% names(edges)) stop("'edges' should be a data frame with a column named 'type'!")
  if(!'weight' %in% names(edges)) stop("'edges' should be a data frame with a column named 'weight'!")

  edges %>%
    filter(weight < epsilon, type == 'edge') %>%
    nrow()
}
uqrmaie1/admixtools documentation built on April 25, 2024, 11:30 a.m.