R/markers.R

Defines functions .setSNPfreqs modifyMarkerMatrix swapGenotypes removeMarkers getMarkers modifyMarker .prettyMarkers .checkMutationMatrix .createMarkerObject setMarkers addMarker marker

Documented in addMarker getMarkers marker modifyMarker modifyMarkerMatrix removeMarkers setMarkers swapGenotypes

#' Marker functions
#'
#' Functions for setting and manipulating marker genotypes for 'linkdat'
#' objects.
#'
#' @param x a \code{\link{linkdat}} object
#' @param ...  an even number of vectors, indicating individuals and their
#'   genotypes. See examples.
#' @param allelematrix a matrix with one row per pedigree member and two columns
#'   per marker, containing the alleles for a single marker.
#' @param m a \code{marker} object or a matrix with alleles. (In
#'   \code{setMarkers} this matrix can contain data of several markers.)
#' @param missing a numeric - or character - of length 1, indicating the code
#'   for missing alleles.
#' @param chrom NA or an integer (the chromosome number of the marker).
#' @param pos NA or a non-negative real number indicating the genetic position
#'   (in cM) of the marker.
#' @param name NA or a character (the name of the marker).
#' @param mutmat a mutation matrix, or a list of two such matrices named
#'   'female' and 'male'. The matrix/matrices must be square, with the allele
#'   labels as dimnames, and each row must sum to 1 (after rounding to 3
#'   decimals).
#' @param annotations a list of marker annotations.
#' @param markernames NULL or a character vector.
#' @param chroms NULL or a numeric vector of chromosome numbers.
#' @param fromPos,toPos NULL or a single numeric.
#' @param marker,markers a numeric indicating which marker(s) to use/modify.
#' @param ids a numeric indicating individual(s) to be modified. In
#'   \code{swapGenotypes} this must have length 2.
#' @param genotype a vector of length 1 or 2, containing the genotype to be
#'   given the \code{ids} individuals. See examples.
#' @param alleles a numeric or character vector containing allele names.
#' @param afreq a numerical vector with allele frequencies. An error is given if
#'   they don't sum to 1 (rounded to 3 decimals).
#' @param new.alleles a numerical matrix of dimensions \code{length(ids),
#'   2*x$nMark}. Entries refer to the internal allele numbering.
#'
#' @return The \code{marker} function returns an object of class \code{marker}:
#'   This is a numerical 2-column matrix with one row per individual, and
#'   attributes 'alleles' (a character vector with allele names), 'nalleles'
#'   (the number of alleles) and 'missing' (the input symbol for missing marker
#'   alleles), 'chrom' (chromosome number), 'name' (marker identifier), 'pos'
#'   (chromosome position in cM).
#'
#'   For \code{addMarker}, \code{setMarkers}, \code{removeMarkers},
#'   \code{modifyMarker}, \code{modifyMarkerMatrix} and \code{swapGenotypes}, a
#'   \code{linkdat} object is returned, whose \code{markerdata} element has been
#'   set/modified.
#'
#'   For \code{getMarkers} a numeric vector containing marker numbers (i.e.
#'   their indices in \code{x$markerdata}) for the markers whose 'name'
#'   attribute is contained in \code{markernames}, 'chrom' attribute is
#'   contained in \code{chroms}, and 'pos' attribute is between \code{from} and
#'   \code{to}. NULL arguments are skipped, so \code{getMarkers(x)} will return
#'   \code{seq_len(x$nMark)} (i.e. all markers).
#'
#' @seealso \code{\link{linkdat}}
#'
#' @examples
#'
#' x = linkdat(toyped)
#' x = removeMarkers(x, 1) # removing the only marker.
#' x
#'
#' # Creating and adding a SNP marker with alleles 'a' and 'b', for which
#' # individual 1 is heterozygous, individuals 2 and 4 are homozygous for the
#' # 'b' allele, and individual 3 has a missing genotype.
#' m1 = marker(x, 1, c('a','b'), c(2,4), c('b','b'))
#' x = addMarker(x, m1)
#'
#' # A rare SNP for which both children are heterozygous.
#' # The 'alleles' argument can be skipped, but is recommended to ensure
#' # correct order of the frequencies.
#' m2 = marker(x, 3:4, 1:2, alleles=1:2, afreq=c(0.99, 0.01))
#' x = addMarker(x, m2)
#'
#' # Modifying the second marker:
#' # Making it triallelic, and adding a genotype to the father.
#' x = modifyMarker(x, marker=2, alleles=1:3, ids=1, genotype=2:3)
#'
#' # Adding an empty SNP (all genotypes are missing):
#' x = addMarker(x, 0, alleles=c('A', 'B'))
#'
#' # Similar shortcut for creating a marker for which all
#' # pedigree members are homozygous for an allele (say 'b'):
#' x = addMarker(x, 'b')
#' # Alternative: m = marker(x, 'b'); addMarker(x, m)
#'
#'
#' @name markers
NULL

#' @rdname markers
#' @export
marker = function(x, ..., allelematrix, alleles = NULL, afreq = NULL, missing = 0, chrom = NA,
    pos = NA, name = NA, mutmat = NULL) {
    arglist = list(...)
    n = length(arglist)
    if (n == 0) {
        if (missing(allelematrix))
            m = matrix(missing, ncol = 2, nrow = x$nInd) else {
            m = as.matrix(allelematrix)
            stopifnot(nrow(m) == x$nInd, ncol(m) == 2)
        }
    }
    if (n > 0) {
        if (!missing(allelematrix))
            stop("Individual genotypes ar not allowed when 'allelematrix' is given.")
        if (n == 1 && length(arglist[[1]]) > 2)
            stop("Syntax error. See ?marker.")
        if (n > 1 && n%%2 != 0)
            stop("Wrong number of arguments.")

        fill = {
            if (n == 1)
                arglist[[1]] else missing
        }
        m = matrix(fill, ncol = 2, nrow = x$nInd, byrow = TRUE)  #create marker matrix with all individuals equal.

        for (i in (seq_len(n/2) * 2 - 1)) {
            # odd numbers up to n-1.
            ids = arglist[[i]]
            geno = arglist[[i + 1]]
            for (j in match(ids, x$orig.ids)) m[j, ] = geno
        }
    }

    .createMarkerObject(m, missing = missing, alleles = alleles, afreq = afreq, chrom = chrom,
        pos = pos, name = name, mutmat = mutmat)
}

#' @rdname markers
#' @export
addMarker = function(x, m, ...) {
    if (is.matrix(m) || is.data.frame(m))
        stopifnot(nrow(m) == x$nInd, ncol(m) == 2)
    if (inherits(m, "marker"))
        m = list(m)
    if (is.list(m) && all(sapply(m, inherits, what = "marker")))
        return(setMarkers(x, structure(c(x$markerdata, m), class = "markerdata")))
    if (!is.list(m) && length(m) == 1)
        m = matrix(m, ncol = 2, nrow = x$nInd)  #gives a nice way of setting an empty or everywhere-homozygous marker, e.g.: x=addMarker(x,0)
    mm = .createMarkerObject(m, ...)
    setMarkers(x, structure(c(x$markerdata, list(mm)), class = "markerdata"))
}

#' @rdname markers
#' @export
setMarkers = function(x, m, annotations = NULL, missing = 0) {
    if (is.null(m))
        markerdata_list = NULL
    else if (inherits(m, "marker"))
        markerdata_list = structure(list(m), class = "markerdata")
    else if (is.list(m) && all(sapply(m, inherits, what = "marker")))
        markerdata_list = structure(m, class = "markerdata")
    else if (inherits(m, "markerdata"))
        markerdata_list = m
    else if ((n <- ncol(m <- as.matrix(m))) == 0)
        markerdata_list = NULL
    else {
        if (is.character(m[1, 1]) && ("/" %in% strsplit(m[1, 1], "")[[1]])) {
            # if alleles are merged to 1 genotype per column
            splitvec = unlist(strsplit(m, "/", fixed = T))
            nrows = nrow(m)
            msplit = matrix(missing, ncol = 2 * n, nrow = nrows)
            msplit[, 2 * seq_len(n) - 1] = splitvec[2 * seq_len(n * nrows) - 1]
            msplit[, 2 * seq_len(n)] = splitvec[2 * seq_len(n * nrows)]
            m = msplit
        }
        if (ncol(m)%%2 != 0)
            stop("Uneven number of marker allele columns")
        nMark = ncol(m)/2
        if (!is.null(annotations)) {
            if (length(annotations) == 2 && !is.null(names(annotations)))
                annotations = rep(list(annotations), nMark)  # if given attrs for a single marker
            else if (length(annotations) != nMark)
                stop("Length of marker annotation list does not equal number of markers.")

            markerdata_list = lapply(1:nMark, function(i) {
                if (is.null(attribs <- annotations[[i]]))
                  return(NULL)
                mi = m[, c(2 * i - 1, 2 * i), drop = FALSE]
                do.call(.createMarkerObject, c(list(matr = mi, missing = missing), attribs))
            })
        } else {
            markerdata_list = lapply(1:nMark, function(i) {
                mi = m[, c(2 * i - 1, 2 * i), drop = FALSE]
                .createMarkerObject(mi, missing = missing)
            })
        }
        markerdata_list[sapply(markerdata_list, is.null)] = NULL
        class(markerdata_list) = "markerdata"
    }
    x$nMark = length(markerdata_list)
    x$markerdata = markerdata_list
    x$available = if (x$nMark > 0) x$orig.ids[rowSums(do.call(cbind, markerdata_list)) > 0] else numeric(0)
    x
}

.createMarkerObject = function(matr, name = NA, chrom = NA, pos = NA, alleles = NULL, afreq = NULL,
    mutmat = NULL, missing = 0) {
    if (is.null(alleles)) {
        vec = as.vector(matr)
        alleles = unique.default(vec[vec != missing])
        if (length(alleles) == 0)
            alleles = 1
    }
    if (!is.numeric(alleles) && !any(grepl("[^0-9\\.]", alleles)))
        alleles = as.numeric(alleles)
    all_ord = order(alleles)
    alleles = alleles[all_ord]
    nalleles = length(alleles)
    if (is.null(afreq))
        afreq = rep.int(1, nalleles)/nalleles else {
        if (length(afreq) != nalleles)
            stop("Number of alleles don't match length of frequency vector")
        if (round(sum(afreq), 2) != 1)
            warning(paste("Allele frequencies for marker", name, " do not sum to 1:", paste(afreq,
                collapse = ", ")))
        afreq = afreq[all_ord]
    }
    if (!is.null(mutmat)) {
        stopifnot(is.list(mutmat) || is.matrix(mutmat))
        # If single matrix given: make sex specific list
        if (is.matrix(mutmat)) {
            mutmat = .checkMutationMatrix(mutmat, alleles)
            mutmat = list(male = mutmat, female = mutmat)
        } else {
            stopifnot(length(mutmat) == 2, setequal(names(mutmat), c("female", "male")))
            mutmat$female = .checkMutationMatrix(mutmat$female, alleles, label = "female")
            mutmat$male = .checkMutationMatrix(mutmat$male, alleles, label = "male")
        }
    }
    m_obj = match(matr, alleles, nomatch = 0)
    attributes(m_obj) = list(dim = dim(matr), name = name, chrom = chrom, pos = pos, nalleles = nalleles,
        alleles = as.character(alleles), afreq = afreq, mutmat = mutmat, missing = missing,
        class = "marker")
    m_obj
}

.checkMutationMatrix = function(mutmat, alleles, label = "") {
    # Check that mutation matrix is compatible with allele number / names.  Sort matrix
    # according to the given allele order (this is important since dimnames are NOT used in
    # calculations).
    N = length(alleles)
    if (label != "")
        label = sprintf("%s ", label)
    if (any((dm <- dim(mutmat)) != N))
        stop(sprintf("Dimension of %smutation matrix (%d x %d) inconsistent with number of alleles (%d).",
            label, dm[1], dm[2], N))
    if (any(round(.rowSums(mutmat, N, N), 3) != 1))
        stop(sprintf("Row sums of %smutation matrix are not 1.", label))
    alleles.char = as.character(alleles)
    if (!setequal(rownames(mutmat), alleles) || !setequal(colnames(mutmat), alleles))
        stop(sprintf("Dimnames of %smutation do not match allele names.", label))
    m = mutmat[alleles.char, alleles.char]

    # lumbability: always lumpable (any alleles) if rows are identical (except diagonal)
    attr(m, "lumpability") = NA
    if (N > 2) {
        if (all(vapply(1:N, function(i) diff(range(m[-i, i])) == 0, logical(1))))
            attr(m, "lumpability") = "always"  # If each column has 0 range
    }
    m
}


.prettyMarkers = function(m, alleles = NULL, sep = "", missing = NULL, singleCol = FALSE, sex) {
    if (is.null(m))
        return(m)
    if (is.matrix(m))
        m = list(m)
    if ((n <- length(m)) == 0)
        return(m)
    if (is.null(alleles))
        alleles = lapply(m, attr, "alleles") else {
        if (!is.atomic(alleles))
            stop("The parameter 'alleles' must be NULL, or an atomic vector.")
        if (length(alleles) < max(unlist(lapply(m, attr, "nalleles"))))
            stop("The indicated 'alleles' vector has too few alleles for some markers.")
        alleles = rep(list(alleles), n)
    }
    if (is.null(missing))
        missing = unlist(lapply(m, attr, "missing")) else {
        if (!is.atomic(missing) || length(missing) != 1)
            stop("The parameter 'mising' must be NULL, or a numeric/character of length 1.")
        missing = rep(missing, n)
    }
    mNames = unlist(lapply(m, attr, "name"))
    mNames[is.na(mNames)] = ""
    pretty_m = do.call(c, lapply(seq_len(n), function(i) c(missing[i], alleles[[i]])[m[[i]] +
        1]))
    dim(pretty_m) = c(length(pretty_m)/(2 * n), 2 * n)
    if (singleCol) {
        al1 = pretty_m[, 2 * seq_len(n) - 1, drop = FALSE]
        al2 = pretty_m[, 2 * seq_len(n), drop = FALSE]
        m.matrix = matrix(paste(al1, al2, sep = sep), ncol = n)
        chrom_X = unlist(lapply(m, function(mm) identical(23L, as.integer(attr(mm, "chrom")))))
        if (any(chrom_X)) {
            males = (sex == 1)
            if (!all(hh <- al1[males, chrom_X] == al2[males, chrom_X]))
                warning("Male heterozygosity at X-linked marker detected.")
            m.matrix[males, chrom_X] = al1[males, chrom_X]
        }
        colnames(m.matrix) = mNames
        return(m.matrix)
    } else {
        nam = rep(mNames, each = 2)
        nam[nzchar(nam)] = paste(nam[nzchar(nam)], 1:2, sep = "_")
        colnames(pretty_m) = nam
        return(pretty_m)
    }
}

#' @rdname markers
#' @export
modifyMarker = function(x, marker, ids, genotype, alleles, afreq, chrom, name, pos) {
    if (inherits(marker, "marker")) {
        if (nrow(marker) != x$nInd)
            stop("Wrong dimensions of marker matrix.")
        m = marker
    } else {
        if (!is.numeric(marker) || length(marker) != 1)
            stop("Argument 'marker' must be a single integer or an object of class 'marker'.")
        if (marker > x$nMark)
            stop("Indicated marker does not exist.")
        m = x$markerdata[[marker]]
    }
    mis = attr(m, "missing")

    if (!missing(alleles)) {
        stopifnot(is.atomic(alleles), is.numeric(alleles) || is.character(alleles))
        # if(is.numeric(alleles)) alleles = as.character(alleles)
        if (attr(m, "missing") %in% alleles)
            stop("The 'missing allele' character cannot be one of the alleles.")
        lena = length(alleles)
        if (lena == attr(m, "nalleles"))
            attr(m, "alleles") = as.character(alleles) else {
            num_als = unique.default(as.vector(m[m != 0]))
            if (lena < length(num_als))
                stop("Too few alleles.")
            pm = matrix(c(0, attr(m, "alleles"))[m + 1], ncol = 2)
            m = .createMarkerObject(pm, missing = 0, alleles = alleles, afreq = rep(1, lena)/lena,
                chrom = attr(m, "chrom"), pos = attr(m, "pos"), name = attr(m, "name"), mutmat = attr(m,
                  "mutmat"))
        }
    }
    if (!missing(afreq)) {
        if (round(sum(afreq), 2) != 1)
            stop("The allele frequencies don't sum to 1.")
        if (length(afreq) != attr(m, "nalleles"))
            stop("The length of allele frequency vector doesn't equal the number of alleles.")
        if (is.null(names(afreq)))
            attr(m, "afreq") = afreq else if (all(names(afreq) %in% attr(m, "alleles")))
            attr(m, "afreq") = afreq[attr(m, "alleles")] else stop("The names of the frequency vector don't match the allele names.")
    }

    changegeno = sum(!missing(ids), !missing(genotype))
    if (changegeno == 1)
        stop("The parameters 'ids' and 'genotype' must either both be NULL or both non-NULL.")
    if (changegeno == 2) {
        if (!is.atomic(genotype) || length(genotype) > 2)
            stop("The 'genotype' parameter must be a numeric or character vector of length 1 or 2.")
        pm = .prettyMarkers(list(m))
        for (i in .internalID(x, ids)) pm[i, ] = genotype
        if (!all(as.vector(pm) %in% c(attr(m, "alleles"), mis)))
            stop("Unknown allele(s). Please specify allele names using the 'alleles' argument.")
        attribs = attributes(m)
        m = match(pm, attr(m, "alleles"), nomatch = 0)
        attributes(m) = attribs
    }
    if (!missing(chrom))
        attr(m, "chrom") = chrom
    if (!missing(name))
        attr(m, "name") = name
    if (!missing(pos))
        attr(m, "pos") = pos
    if (inherits(marker, "marker"))
        return(m) else {
        x$markerdata[[marker]] = m
        x
    }
}


#' @rdname markers
#' @export
getMarkers = function(x, markernames = NULL, chroms = NULL, fromPos = NULL, toPos = NULL) {
    mnos = seq_len(x$nMark)
    if (!is.null(markernames)) {
        if (length(markernames) == 0)
            return(numeric(0))
        mnos = mnos[match(markernames, unlist(lapply(x$markerdata, function(m) attr(m, "name"))),
            nomatch = 0)]
    }
    if (!is.null(chroms)) {
        if (length(chroms) == 0)
            return(numeric(0))
        mnos = mnos[unlist(lapply(x$markerdata[mnos], function(m) attr(m, "chrom"))) %in% chroms]
    }
    if (!is.null(fromPos))
        mnos = mnos[unlist(lapply(x$markerdata[mnos], function(m) attr(m, "pos"))) >= fromPos]
    if (!is.null(toPos))
        mnos = mnos[unlist(lapply(x$markerdata[mnos], function(m) attr(m, "pos"))) <= toPos]
    mnos
}

#' @rdname markers
#' @export

removeMarkers = function(x, markers = NULL, markernames = NULL, chroms = NULL, fromPos = NULL,
    toPos = NULL) {
    if (is.null(markers))
        markers = getMarkers(x, markernames, chroms, fromPos, toPos)
    if (is.null(markers) || length(markers) == 0)
        return(x)
    m = x$markerdata
    m[markers] = NULL
    setMarkers(x, m)
}

#' @rdname markers
#' @export
swapGenotypes = function(x, ids) {
    assert_that(length(ids) == 2)
    ids = .internalID(x, ids)
    y = as.matrix(x)
    y[ids, -(1:6)] = y[ids[2:1], -(1:6)]
    restore_linkdat(y)
}

#TODO!
#print.marker = function(m) {
#    cat(sprintf("Marker name: %s\n", attr(m, 'name')))
#    chrom = attr(m, 'chrom')
#    pos = attr(m, 'pos')
#    if(!is.na(chrom)) pos = paste(chrom, pos, sep=" - ")
#    cat(sprintf("Position: %s\n", pos))
#    cat("Alleles and frequencies:\n")
#    afreq = attr(m, 'afreq')
#    names(afreq) = attr(m, 'allele')
#    print(afreq)
#}

#' @rdname markers
#' @export
modifyMarkerMatrix = function(x, ids, new.alleles) {
    ids = .internalID(x, ids)
    y = as.matrix(x)
    y[ids, -(1:6)] = new.alleles
    restore_linkdat(y)
}

.setSNPfreqs = function(x, newfreqs) {
    stopifnot(all(vapply(x$markerdata, function(m) attr(m, "nalleles"), numeric(1)) == 2))
    newfreqs = rep(newfreqs, length = x$nMark)
    for (i in seq_len(x$nMark)) attr(x$markerdata[[i]], "afreq") = c(newfreqs[i], 1 - newfreqs[i])
    x
}

Try the paramlink package in your browser

Any scripts or data that you put into this service are public.

paramlink documentation built on April 15, 2022, 9:06 a.m.