R/utils.R
In FamAgg: Pedigree Analysis and Familial Aggregation

Documented in connectedSubgraph ped2graph subPedigree

## just simple utility function to test for empty/not set column names
noColNames <- function(x){
    CN <- colnames(x)
    all(CN %in% paste("V", 1:ncol(x), sep=""))
}

## takes a pedigree (data.frame, pedigree or pedigreeList) as input and
## generates a valid pedigree for the submitted ids (if no ids are specified
## it returns the ped). basically, the function subsets the full pedigree to
## the individuals ids and recursively adds parents if needed.
buildPedigree <- function(ped, ids){
    ## check pedigree and eventually add missing parents...
    if(missing(ids))
        return(ped)
    ## want to have a data.frame or matrix...
    if(is(ped, "pedigree") | is(ped, "pedigreeList"))
        ped <- ped2df(ped)
    if(!all(ids %in% rownames(ped)))
        stop("Some of the submitted ids are not in the pedigree!")
    subfam <- ped[as.character(ids), ]
    ## now recursively add parents, if needed...
    parents <- as.character(unique(c(subfam$father, subfam$mother)))
    ## parents[is.na(parents)] <- 0
    ## parents <- parents[parents!=0]
    parents <- parents[!is.na(parents)]
    while(!all(parents %in% as.character(subfam$id))){
        subfam <- ped[unique(c(as.character(subfam$id), parents)), ]
        parents <- as.character(unique(c(subfam$father, subfam$mother)))
        ## parents[is.na(parents)] <- 0
        ## parents <- parents[parents!=0]
        parents <- parents[!is.na(parents)]
    }
    subfam
}


doGetSiblings <- function(ped, id = NULL) {
    if(is.null(id))
        stop("At least one if has to be specified!")
    parents <- doGetParents(ped, id = id)
    siblings <- doGetChildren(ped, id = parents, maxlevel = 1)
    siblings
}

## gets all the ancestors for (one or more) id(s)
doGetAncestors <- function(ped, id = NULL, maxlevel = 3) {
    if (is.null(id))
        stop("At least one id has to be specified!")
    id <- as.character(id)
    allids <- NULL
    for (i in 1:maxlevel) {
        ## get the parents of all the allids
        newids <- unlist(
            ped[which(ped$id %in% id), c("father", "mother")],
            use.names = FALSE
        )
        zeros <- is.na(newids)
        ##zeros <- newids == founder
        if(all(zeros) | length(newids)==0)
            break
        newids <- newids[!zeros]
        id <- newids
        allids <- c(allids, newids)
    }
    as.character(unique(allids))
}

doGetParents <- function(ped, id = NULL) {
    doGetAncestors(ped = ped, id = id, maxlevel = 1)
}

## gets all the children for (one or more) id(s)
## note: these are only children in direct blood line.
doGetChildren <- function(ped, id=NULL, maxlevel=16){
    if(is.null(id))
        stop("At least one id has to be specified!")
    allids <- NULL
    for(i in 1:maxlevel){
        ## get all the children for the ids
        newids <- ped[which(ped$father %in% id | ped$mother %in% id), "id"]
        if(length(newids)==0)
            break
        id <- newids
        allids <- c(allids, newids)
    }
    as.character(unique(allids))
}

## check for the submitted id(s) whether a mate (spouse) is
## available (in column mother or father of the pedigree ped)
## and if yes add them to the returned list of ids.
## returned value contains
## eventually missing mates.
doGetMissingMate <- function(ped, id) {
    if(is.null(id))
        stop("Al least one id has to be specified")
    newids <- c(ped[which(ped$father %in% id), "mother"],
                ped[which(ped$mother %in% id), "father"])
    unique(newids)
}

writePed <- function(ped, file){
    Sexes <- as.numeric(sanitizeSex(ped$sex))
    Sexes[is.na(Sexes)] <- 0
    ped$sex <- Sexes
    write.table(ped, file=file, quote=FALSE, sep="\t", row.names=FALSE,
                col.names=FALSE)
}

## creates a data.frame (or matrix) for a pedigree...
ped2df <- function(ped){
    ped <- do.call(cbind, ped)
    if(!any(colnames(ped)=="famid"))
        ped <- cbind(ped, famid=rep(1, nrow(ped)))
    ped <- ped[ , c("famid", "id", "findex", "mindex", "sex")]
    ## FIXME!!! Can I get NAs as findex too?
    ## have to rematch the fater id and mother id...
    notZ <- which(ped[, "findex"] > 0)
    ped[notZ, "findex"] <- ped[ped[notZ, "findex"], "id"]
    notZ <- which(ped[, "findex"] > 0)
    ped[notZ, "mindex"] <- ped[ped[notZ, "mindex"], "id"]
    colnames(ped) <- .PEDCN
    ped <- data.frame(ped)
    rownames(ped) <- ped$id
    ped
}


df2ped <- function(ped){
    ped <- checkPedCol(ped)
    if(!any(colnames(ped)=="family"))
        ped <- cbind(ped, family=rep(1, nrow(ped)))
    ## if there is only one family, remove that one ;)
    if(length(unique(ped$family)) == 1)
        ped <- ped[, colnames(ped)!="family"]
    ##
    if(any(colnames(ped) == "affected")){
        if(any(colnames(ped) == "family")){
            peddi <- kinship2::pedigree(id=ped$id, dadid=ped$father,
                                        momid=ped$mother, famid=ped$family,
                                        sex=as.numeric(ped$sex),
                                        affected=ped$affected)
        }else{
            peddi <- kinship2::pedigree(id=ped$id, dadid=ped$father,
                                        momid=ped$mother,
                                        sex=as.numeric(ped$sex),
                                        affected=ped$affected)
        }
    }else{
        if(any(colnames(ped) == "family")){
            peddi <- kinship2::pedigree(id=ped$id, dadid=ped$father,
                                        momid=ped$mother, famid=ped$family,
                                        sex=as.numeric(ped$sex))
        }else{
            peddi <- kinship2::pedigree(id=ped$id, dadid=ped$father,
                                        momid=ped$mother,
                                        sex=as.numeric(ped$sex))
        }
    }
    peddi
}


## finds related individuals, i.e. individuals sharing the same blood line.
## should we call this "shareBloodLine" or "shareKinship"
## ped has to be the data.frame that we get with the pedigree method.
doShareKinship <- function(ped, kin, id){
    if(missing(ped) & missing(kin))
        stop("ped or kin has to be submitted!")
    if(missing(id))
        stop("At least one id has to be submitted!")
    id <- as.character(id)
    ## calculate kin from ped
    if(missing(kin)){
        kin <- kinship(id=ped$id, momid=ped$mother, dadid=ped$father,
                    sex=ped$sex)
    }
    if(!all(id %in% colnames(kin)))
        stop("id has to be present in ped or kin!")
    ## subset to id (which can be more than one)
    kin <- kin[id, , drop=FALSE]
    related <- colnames(kin)[colSums(kin) > 0]
    related
}

####============================================================
##  doGetFounders
##
##  Returns the ids of the founders in the specified pedigree.
####------------------------------------------------------------
doGetFounders <- function(ped){
    ped[is.na(ped$mother) & is.na(ped$father), "id"]
}

####============================================================
##  doGetSingletons
##
##  Return the ids of those founders that have no child in the
##  pedigree.
####------------------------------------------------------------
doGetSingletons <- function(ped){
    founders <- ped[is.na(ped$mother) & is.na(ped$father), , drop=FALSE]
    if(nrow(founders) > 0){
        childl <- !((founders$id %in% ped$father) |
                    (founders$id %in% ped$mother))
        if(any(childl)){
            return(founders[childl, "id"])
        }
    }
    character()
}


## gets all parents.
getParents <- function(ped, id){
    if(is(ped, "pedigree") | is(ped, "pedigreeList"))
        ped <- ped2df(ped)
    ped <- checkPedCol(ped)
    if(missing(id))
        stop("id is missing!")
    if(is.null(rownames(ped)))
        rownames(ped) <- ped[, "id"]
    id <- as.character(id)
    if(!is.data.frame(ped))
        ped <- as.data.frame(ped)
    ped[id, c("father", "mother"), drop=FALSE]
}


## just a simple helper function that checks that the ped parameter
## has the required colnames, is a data.frame and has rownames set.
checkPedCol <- function(ped){
    if(missing(ped))
        stop("ped is required!")
    if(!all(c("id", "father", "mother") %in% colnames(ped)))
        stop("id, father, mother are required columns in ped.")
    if(!is.data.frame(ped))
        ped <- as.data.frame(ped)
    ## add a family column if not present...
    if(!any(colnames(ped) == "family"))
        ped <- cbind(ped, family=rep(1, nrow(ped)))
    rownames(ped) <- as.character(ped$id)
    ped
}

## Sanitize the pedigree the way we want: sex should be sanitized (if present),
## father and mother column should contain NA for founders instead of "" or 0.
sanitizePed <- function(ped){
    if(any(colnames(ped) == "sex"))
        ped$sex <- sanitizeSex(ped$sex)
    Father <- ped$father
    if(is.factor(Father))
        Father <- as.character(Father)
    if(is.numeric(Father))
        Father[which(Father == 0)] <- NA
    if(is.character(Father))
        Father[which(Father == "")] <- NA
    Mother <- ped$mother
    if(is.factor(Mother))
        Mother <- as.character(Mother)
    if(is.numeric(Mother))
        Mother[which(Mother == 0)] <- NA
    if(is.character(Mother))
        Mother[which(Mother == "")] <- NA
    ped$father <- Father
    ped$mother <- Mother
    ped
}

#' find the subPedigree (smallest pedigree) including the individuals specified
#' with id and all eventually needed additionals
#'
#' @param ped pedigree `data.frame`.
#'
#' @param id `character` with the IDs of the individuals.
#'
#' @param all `logical(1)`
#'
#' @md
#'
#' @noRd
#'
#' @author Johannes Rainer
subPedigree <- function(ped, id = NULL, all = TRUE) {
    CL <- class(ped)
    ## handling for pedigree and pedigreeList classes: unfortunately these
    ## are not correct classes, otherwise I could define methods instead of
    ## a function...
    if (is(ped, "pedigree") | is(ped, "pedigreeList")) {
        ped <- ped2df(ped)
    }
    ped <- checkPedCol(ped)
    if (is.null(id))
        stop("id is missing!")
    id <- as.character(id)
    subgr <- connectedSubgraph(ped2graph(ped), nodes = id, mode = "all",
                               all.nodes = all, ifnotfound = NA)
    if (!is(subgr, "igraph")) {
        stop("No common pedigree for the submitted individuals found!")
    }
    inds <- names(V(subgr))
    ## get missing mates
    inds <- unique(c(inds, doGetMissingMate(ped, id = inds)))
    subped <- ped[inds, ]
    subped <- removeSingletons(subped)
    ## fix founders.
    subped[!(subped$father %in% subped$id), "father"] <- NA
    subped[!(subped$mother %in% subped$id), "mother"] <- NA
    ## Fix if we have individuals with a mother or a father, but not both.
    idx <- which(is.na(subped$father) != is.na(subped$mother))
    if (length(idx)) {
        idx_ped <- match(subped$id[idx], ped$id)
        id_msng <- ped$mother[idx_ped][is.na(subped$mother[idx])]
        id_msng <- c(id_msng, ped$father[idx_ped][is.na(subped$father[idx])])
        subped$father[idx] <- ped$father[idx_ped]
        subped$mother[idx] <- ped$mother[idx_ped]
        if (length(id_msng)) {
            subped <- rbind(subped, ped[match(id_msng, ped$id), ])
            subped[!(subped$father %in% subped$id), "father"] <- NA
            subped[!(subped$mother %in% subped$id), "mother"] <- NA
        }
    }
    if(CL == "pedigree" | CL == "pedigreeList")
        subped <- df2ped(subped)
    subped
}


## find the (eventually smallest) connected subgraph of all specified
## nodes.
## graph: igraph object.
## nodes: node (vertex) names.
## mode: to calculate the smallest distance.
## all.nodes: if all nodes should be present in the resulting graph.
## ifnotfound: by default, the function throws an error if no connected
##             subgraph is found, or, if all.nodes=TRUE, not all input nodes
##             are in the connected subgraph. If ifnotfound is specified, its
##             value is returned instead of an error.
connectedSubgraph <- function(graph, nodes, mode="all", all.nodes=TRUE,
                              ifnotfound){
    if(!is(graph, "igraph"))
        stop("graph has to be an igraph object!")
    mode <- match.arg(mode, c("all", "in", "out"))
    if(missing(nodes) | length(nodes) < 2)
        stop("At least two nodes have to be submitted!")
    nodes <- as.character(nodes)
    ## check if nodes are present in the graph
    nono <- !(nodes %in% names(V(graph)))
    if(any(nono)){
        missingNodes <- nodes[nono]
        if(all.nodes)
            stop("With argument all.nodes=TRUE all nodes have to be present ",
                 "in the graph! Nodes ", paste(missingNodes, collapse=", "),
                 " not present in the original graph.")
        nodes <- nodes[!nono]
        warning("Nodes ", paste(missingNodes, collapse=", "),
                " removed as they are not present in graph! ")
        if(length(nodes) < 2)
            stop("At least two nodes have to be present in the graph!")
    }
    ## OK, now we can start...
    ## 1) iterate over all nodes and calculate the shortest path between all.
    ## 2) get all nodes along the shortest path and add to the needed nodes.
    needNodes <- character()
    ## two versions: if mode="all" the directionality does not matter, thus we
    ## can run a faster version. for the other modes really calculate distances
    ## between all (both directions).
    if(mode == "all"){
        for(i in 1:(length(nodes)-1)){
            suppressWarnings(
                paths <- shortest_paths(graph, from=nodes[i],
                                        to=nodes[(i+1):length(nodes)],
                                        mode=mode)
            )
            needNodes <- unique(c(needNodes,
                                  unlist(lapply(paths$vpath, names))))
        }
    }else{
        for(i in 1:length(nodes)){
            suppressWarnings(
                paths <- shortest_paths(graph, from=nodes[i],
                                        to=nodes[-i],
                                        mode=mode)
            )
            needNodes <- unique(c(needNodes,
                                  unlist(lapply(paths$vpath, names))))
        }
    }
    if(length(needNodes)==0){
        if(missing(ifnotfound)){
            stop("Could not find any connected subgraph!")
        }
        warning("Could not find any connected subgraph!")
        return(ifnotfound)
    }
    ## check the subgraph.
    if(!all(nodes %in% needNodes)){
        if(all.nodes){
            if(missing(ifnotfound)){
                stop("Could not find any connected subgraph!")
            }
            warning("Could not find any connected subgraph!")
            return(ifnotfound)
        }else{
            warning("Nodes ", paste(nodes[!(nodes %in% needNodes)]),
                    " not present in the returned graph!")
        }
    }
    ## finally build the graph...
    ## subgraph is a little tedious as it requires submitting of vertex INDICES!
    induced_subgraph(graph, vids=needNodes)
}

## search for a common ancestor couple in a pedigree.
## ped: data.frame or pedigree or pedigreeList.
doGetCommonAncestor <- function(ped, id, method="min.dist"){
    method <- match.arg(method, c("min.dist", "smallest.mean.dist"))
    if(missing(id))
        stop("id has to be specified")
    if(length(id) <= 1)
        stop("The minimum number of ids has to be 2.")
    if(is(ped, "pedigree") | is(ped, "pedigreeList"))
        ped <- ped2df(ped)
    ped <- checkPedCol(ped)
    ## check if id are in ped:
    id <- as.character(id)
    if(!all(id %in% rownames(ped))){
        stop("all of the sumitted ids should be present in the pedigree!")
    }
    ## check if the ids are in the same family
    if(any(colnames(ped)=="family")){
        if(length(unique(ped[id, "family"])) > 1){
            warning("The provided ids are from different families; still",
                    " trying to find a common ancestor but I'll probably fail.")
        }else{
            ## subset the pedigree to this family...
            ped <- ped[ped$family == ped[id, "family"][1], , drop=FALSE]
        }
    }
    ## OK; done with input parameter checking.
    ## transform the pedigree into a graph
    igr <- ped2graph(ped)
    ## "in": go from children -> parents
    ## LLLL TODO can do faster!!!
    din <- distances(igr, mode="in")
    ## use the ROWS in that.
    subD <- din[id, , drop=FALSE]
    subD <- subD[, !apply(subD, MARGIN=2, function(x){
        return(all(infOrZero(x)))
    }) , drop=FALSE]
    ## check if we've got one common ancestor:
    if(all(is.infinite(colSums(subD)))){
        warning("Did not find any common ancestor!")
        return(NA)
    }
    ## remains to find the "one and only" (well, usually we expect a couple).
    subD <- subD[, !is.infinite(colSums(subD)), drop=FALSE]
    ## now get the ones that are closest:
    if(method == "min.dist"){
        dists <- apply(subD, MARGIN=2, min)
        ancs <- names(dists)[dists==min(dists)]
    }
    if(method == "smallest.mean.dist"){
        dists <- colMeans(subD)
        ancs <- names(dists)[dists==min(dists)]
    }
    ## at last trying to get the mate... if any; usually, ancs should
    ## already correspond to the ancestor couple.
    ancs <- c(ancs, doGetMissingMate(ped, ancs))
    unique(ancs)
}

## returns true if a value is 0 or infinite
infOrZero <- function(x){
    (is.infinite(x) | x == 0)
}

## transform a pedigree into a (directed) igraph
## direction is always parent->child.
## this time each individual is treated as a single node!
ped2graph <- function(ped){
    if(is(ped, "pedigree") | is(ped, "pedigreeList"))
        ped <- ped2df(ped)
    ped <- checkPedCol(ped)
    ## define end/starting points
    ped[!ped[, "father"] %in% ped$id, "father"] <- NA
    ped[!ped[, "mother"] %in% ped$id, "mother"] <- NA
    eds <- apply(ped[, c("id", "father", "mother")], MARGIN=1, function(x){
        retval <- c()
        if(!is.na(x["father"]))
            retval <- x[c("father", "id")]
        if(!is.na(x["mother"]))
            retval <- c(retval, x[c("mother", "id")])
        return(retval)
    })
    ## what the heck... I need character vertex names!
    Edges <- unlist(eds, use.names=FALSE)
    igr <- graph(edges=as.character(Edges))
    igr
}


## takes a pedigree as input and tries to identify the founders
## in case there are more than two founder pairs (i.e. founders with the same,
## largest number of offspring generations), it returns only one of them.
doFindFounders <- function(ped, family, id){
    if(!any(colnames(ped) == "family"))
        ped <- data.frame(ped, family=rep(1, nrow(ped)), stringsAsFactors=FALSE)
    families <- unique(as.character(ped$family))
    if(missing(family))
        family <- NULL
    if (missing(id))
        id <- NULL
    if(is.null(family) & is.null(id)){
        family <- families[1]
        warning("No family specified; using the first one in the pedigree: ",
                family, ".")
    }
    if (!is.null(family))
        ped <- ped[ped$family == family, , drop=FALSE]
    if (!is.null(id)) {
        return(doFindFoundersForId(ped = ped, id = id))
    } else {
        founders <- ped$id[which(is.na(ped$father) & is.na(ped$mother))]
        ## determine for each founder the number of generations of its children,
        ## grandchildren etc.
        founderGen <- doCountGenerations(ped, id=founders, direction="down")
        founders <- names(founderGen)[founderGen == max(founderGen)][1]
        founders <- c(founders, doGetMissingMate(ped, founders))
        return(unique(founders))
    }
}

##' Find founders for the pedigree of a given individual. Have to first build
##' the largest pedigree for the given individual and identify the founders in
##' that pedigree.
##'
##' @note The function returns only a single founder pair, i.e. the one with the
##' largest number of offspring generations.
##' @noRd
doFindFoundersForId <- function(ped, id) {
    if (length(id) > 1) {
        id <- id[1]
        warning("length of 'id' was > 1, but only the first id was selected!")
    }
    ## Check if we've got this id in the pedigree
    if (!any(ped[, "id"] == id))
        stop("Individual with id '", id, "' not found in the pedigree!")
    ## Building the pedigree for that individual.
    ids <- unique(c(doGetAncestors(ped = ped, id = id, maxlevel = 200), id))
    ids <- unique(c(ids, doGetChildren(ped = ped, id = ids, maxlevel = 200)))
    ids <- unique(c(ids, doGetMissingMate(ped, id = ids)))
    ped <- ped[ped[, "id"] %in% ids, , drop = FALSE]
    ## OK, now we have the pedigree for the individual, now find founders.
    founders <- ped[which(is.na(ped[, "father"]) &
                          is.na(ped[, "mother"])), "id"]
    ## determine for each founder the number of generations of its children,
    ## grandchildren etc.
    founderGen <- doCountGenerations(ped, id = founders, direction = "down")
    founders <- names(founderGen)[founderGen == max(founderGen)][1]
    founders <- c(founders, doGetMissingMate(ped, founders))
    unique(founders)
}

## calculate for each id the number of generations up- or down.
doCountGenerations <- function(ped, id=NULL, direction="down"){
    if(is.null(id))
        stop("At least one id has to be specified!")
    direction <- match.arg(direction, c("up", "down"))
    if(direction == "up")
        walkFun <- doGetAncestors
    if(direction == "down")
        walkFun <- doGetChildren
    res <- sapply(id, function(x){
        generation <- 0
        ids <- x
        repeat{
            ids <- walkFun(ped, id=ids, maxlevel=1)
            if(length(ids) == 0){
                return(generation)
            }else{
                generation <- generation + 1
            }
        }
    })
    names(res) <- id
    res
}

## returns a list of generations for each family in the pedigree.
## generations are relative to the founders.
doEstimateGenerationsFor2 <- function(ped, family=NULL){
    if(!is.null(family)){
        ped <- ped[ped$family %in% family, ]
        if(nrow(ped) == 0)
            stop("The submitted family id is not present in the pedigree!")
    }
    ## OK, now loop through the families...
    pedList <- split(ped, ped$family)
    Gens <- lapply(pedList, function(currentPed){
        founders <- doFindFounders(currentPed, family=currentPed[1, "family"])
        generation <- doGetGenerationFrom2(currentPed, founders[1])
        return(generation)
    })
    Gens
}

getAllSibsNMates <- function(ped, id){
    ## recursively fetch siblings and mates
    sibsNmates <- id
    repeat{
        sibs <- unique(c(id, doGetSiblings(ped, id=id)))
        sibsNmates <- unique(c(doGetMissingMate(ped, id=sibs), sibs))
        if(length(sibsNmates) != length(id)){
            id <- sibsNmates
        }else{
            break
        }
    }
    sibsNmates
}

doGetGenerationFrom2 <- function(ped, id, generation=0){
    if(missing(id))
        stop("id has to be specified!")
    if(missing(ped))
        stop("ped has to be specified!")
    id <- as.character(id)
    if(length(id) > 1){
        id <- id[1]
        warning("Length of 'id' is larger 1. Using only the first id ",
                "in 'id': ", id)
    }
    if(!any(ped$id %in% id))
        stop("Can not find id in the pedigree!")
    famid <- ped[ped$id == id, "family"]
    ped <- ped[ped$family == famid, , drop=FALSE]
    ## first define the connected!!! pedigree.
    pedg <- ped2graph(ped)
    if(clusters(pedg)$no > 1){
        Cl <- clusters(pedg)
        warning("Found ", Cl$no, " connected sub-graphs in the pedigree ",
                "for family ", famid, "! Will restrict the analysis to the ",
                "subgraph containing individual ", id, ".")
        vertices <- names(Cl$membership)[Cl$membership == Cl$membership[id]]
        pedg <- induced_subgraph(pedg, vids=vertices)
    }
    individuals <- names(V(pedg))
    gens4individuals <- rep(NA, length(individuals))
    names(gens4individuals) <- individuals
    if(!any(names(gens4individuals) == id))
        stop("Individual ", id, " is not part of the connected pedigree!")
    gens4individuals[id] <- 0
    fromId <- id
    Tested <- rep(FALSE, length(gens4individuals))
    repeat{
        ## get the generation of the individuals from which we start
        fromIdBool <- individuals %in% fromId
        Tested[fromIdBool] <- TRUE
        currentGens <- gens4individuals[fromIdBool]
        currentGens <- split(currentGens, currentGens)
        Res <- lapply(currentGens, allids=individuals, FUN=function(z, allids){
            ## get the ancestors and set the generation for them
            ancs <- getAncestors(ped, id=names(z), max.generations=1)
            if(length(ancs) > 0){
                ancret <- cbind(which(allids %in% ancs), (z[1] - 1))
            }else{
                ancret <- NULL
            }
            ## get the children
            kids <- getChildren(ped, id=names(z), max.generation=1)
            if(length(kids) > 0){
                kidret <- cbind(which(allids %in% kids), (z[1] + 1))
            }else{
                kidret <- NULL
            }
            return(rbind(ancret, kidret))
        })
        Res <- do.call(rbind, Res)
        if(nrow(Res) == 0){
            warning("What the heck... stopped before we got all?")
            break
        }
        ## don't want to search the same id several times... if Tested is
        ## TRUE, don't use them again.
        toTestIdx <- Res[, 1][!Tested[Res[, 1]]]
        fromId <- individuals[toTestIdx]
        gens4individuals[Res[, 1]] <- Res[, 2]
        if(sum(is.na(gens4individuals)) == 0)
            break
    }
    ## make a larger generation vector.
    allgens <- rep(NA, nrow(ped))
    names(allgens) <- ped$id
    allgens[names(gens4individuals)] <- gens4individuals
    allgens
}

##********************************************************************
##
##   Utility functions
##
##********************************************************************
sanitizeSex <- function(x){
    if(is.numeric(x)){
        ## require values 1, 2, NA
        numrange <- unique(x)
        if(!all(c(1, 2) %in% numrange))
            stop("If column sex is numeric it has to contain 1 (=male) ",
                 "and 2 (=female) values! Unknown or missing can be encoded",
                 " by NA or any other number than 1 or 2.")
        news <- rep(NA, length(x))
        news[which(x == 1)] <- "M"
        news[which(x == 2)] <- "F"
        return(factor(news, levels=c("M", "F")))
    }
    if(is.factor(x))
        x <- as.character(x)
    if(is.character(x)){
        ## remove all white spaces:
        x <- gsub(x, pattern="\\s", replacement="")
        ## that's tricky...kind of...
        ## just get the first character; want to have M and F...
        x <- substr(toupper(x), 1, 1)
        chars <- unique(x)
        if(length(chars) == 2){
            if(!all(c("M", "F") %in% chars))
                stop("Male and female individuals have to be represented",
                     " by 'M' and 'F', respectively!")
        }
        if(length(chars) == 1){
            if(!any(c("M", "F") %in% chars))
                stop("Male and female individuals have to be represented",
                     " by 'M' and 'F', respectively!")
        }
        if(length(chars[!is.na(chars)]) > 2)
            warning("All characters in column sex other than M and F",
                    " (or male, female etc) are set to NA!")
        news <- rep(NA, length(x))
        news[which(x == "M")] <- "M"
        news[which(x == "F")] <- "F"
        return(factor(news, levels=c("M", "F")))
    }
}


#####
#### NOTE:
##   isn't that the same as removeSingletons???
##
####
## remove non-connected individuals. We're assuming that ped is the pedigree
## of a SINGLE family, as we expect to get ONE graph. in case we've got 2, we're
## using the largest connected one.
## solveMultiGraph: what should we do if we get multiple graphs?
##    + use.all: just use all, i.e. return vertix names of all.
##    + use.largest: just return vertices of the largest connected sub-graph.
doPrunePed <- function(ped, addMissingMates=FALSE, solveMultiGraph="use.all"){
    .Deprecated("removeSingletons")
    solveMultiGraph <- match.arg(solveMultiGraph, c("use.all", "use.largest"))
    pedg <- ped2graph(ped)
    if(solveMultiGraph == "use.largest"){
        if(clusters(pedg)$no > 1){
            Cl <- clusters(pedg)
            warning("Found ", Cl$no,
                    " connected sub-graphs in the pedigree! Will restrict ",
                    "the analysis to the largest connected pedigree",
                    " in that family.")
            vertices <-
                names(Cl$membership)[Cl$membership ==
                                     as.numeric(sort(table(Cl$membership),
                                                     decreasing=TRUE))[1]]
            pedg <- induced_subgraph(pedg, vids=vertices)
        }
    }
    keepIds <- names(V(pedg))
    if(addMissingMates){
        missMates <- doGetMissingMate(ped, id=keepIds)
        keepIds <- unique(c(missMates, keepIds))
    }
    notThere <- !(as.character(ped$id) %in% keepIds)
    ped <- ped[!notThere, , drop=FALSE]
    if(any(notThere))
        warning("Had to remove ", sum(notThere),
                " un-connected individuals from the pedigree.")
    ped
}


## a singleton a.k.a childless founder is an individual which id is not
## present in the father or mother column of the pedigree and which has 0
## in mother and father
.removeSingletons <- function(ped){
    message("Removing singletons...", appendLF=FALSE)
    ped <- checkPedCol(ped)
    ped <- sanitizePed(ped)
    childlF <- doGetSingletons(ped)
    if(length(childlF) > 0){
        ped <- ped[!(ped$id %in% childlF), ]
        message(" ", length(childlF), " removed.")
    }else{
        message(" none present.")
    }
    ped
}