R/find.gene.pseudomarker.R

Defines functions find.gene.pseudomarker

Documented in find.gene.pseudomarker

## find.gene.pseudomarker.R
## Karl W Broman

# find.gene.pseudomarker:
#' Find nearest peudomarker to each gene
#'
#' Pull out the pseudomarker that is closest to the position of each of a
#' series of genes.
#'
#' We first convert positions (by interpolation) from those contained within
#' `cross` to physical coordinates contained in `pmap`.  We then use
#' [qtl::find.pseudomarker()] to identify the closest pseudomarker to
#' each gene location.
#'
#' We also include the positions of the pseudomarkers, and we print a warning
#' message if pseudomarkers are > 2 Mbp from the respective gene.
#'
#' @param cross An object of class `"cross"` containing data for a QTL
#' experiment.  See the help file for [qtl::read.cross()] in the
#' R/qtl package (<https://rqtl.org>).
#' @param pmap A physical map of the markers in `cross`, with locations in
#' Mbp.  This is a list whose components are the marker locations on each
#' chromosome.
#' @param geneloc A data frame specifying the physical locations of the genes.
#' There should be two columns, `chr` for chromosome and `pos` for
#' position in Mbp.  The rownames should indicate the gene names.
#' @param where Indicates whether to pull pseudomarkers from the genotype
#' probabilities (produced by [qtl::calc.genoprob()]) or from the
#' imputed genotypes (produced by [qtl::sim.geno()]).
#' @return A data frame with columns `chr` (the chromosome) and
#' `pmark` (the name of the pseudomarker).  The third column `pos`
#' contains the Mbp position of the pseudomarker.  The final column is the
#' signed distance between the gene and the pseudomarker.  The rownames
#' indicate the gene names.
#' @author Karl W Broman, \email{broman@@wisc.edu}
#' @seealso [qtl::find.pseudomarker()],
#' [qtl::find.pseudomarkerpos()], [plotEGclass()],
#' [disteg()], [calc.locallod()]
#' @keywords utilities
#' @examples
#' data(f2cross, expr1, genepos, pmap)
#' library(qtl)
#' \dontshow{
#' n_ind <- 20
#' n_genes <- 5
#' f2cross <- f2cross[,1:n_ind]
#' expr1 <- expr1[1:n_ind,1:n_genes]
#' genepos <- genepos[1:n_genes,]}
#' # calc QTL genotype probabilities
#' f2cross <- calc.genoprob(f2cross, step=1)
#'
#' # find nearest pseudomarkers
#' pmark <- find.gene.pseudomarker(f2cross, pmap, genepos, "prob")
#'
#' @export
find.gene.pseudomarker <-
    function(cross, pmap, geneloc, where=c("prob", "draws"))
{
    where <- match.arg(where)
    if(!(where %in% names(cross$geno[[1]])))
        stop("You first need to run ", ifelse(where=="prob", "calc.genoprob", "sim.geno"), ".")

    cross <- qtl::replacemap(cross, pmap)
    res <- data.frame(chr=geneloc$chr,
                      pmark=qtl::find.pseudomarker(cross, geneloc$chr, geneloc$pos, where, addchr=FALSE),
                      stringsAsFactors=FALSE)

    rownames(res) <- rownames(geneloc)

    pmark <- res$pmark
    gr <- grep("^loc[0-9]+\\.*[0-9]*(\\.[0-9]+)*$", pmark)
    if(length(gr)>0)
        pmark[gr] <- paste("c", res$chr[gr], ".", pmark[gr], sep="")
    upmark <- unique(pmark)
    thepos <- qtl::find.pseudomarkerpos(cross, upmark, where)
    res$pos <- thepos[match(pmark, rownames(thepos)),2]

    res <- cbind(res, dist.from.gene=(d <- geneloc$pos - res$pos))
    d <- abs(d)
    if(any(d > 2)) {
        ngap <- sum(d>2)
        maxd <- max(d)
        warning(ngap, " genes differ from pseudomarker pos by > 2 Mbp, with gaps as big as ", round(maxd, 1), " Mbp")
    }

    res
}
kbroman/lineup documentation built on May 10, 2023, 6:02 p.m.