Nothing
## Functions: pedgene
## wrapper for computing retrospective likelihood stat on pedigrees for rare
## variants over multiple genes
## Authors: Jason Sinnwell, Dan Schaid, and Alessia Visconti
pedgene <- function(ped, geno, map=NULL, male.dose=2, checkpeds=TRUE, verbose.return=FALSE,
weights=NULL, weights.beta=c(1,25), weights.mb=FALSE, relation=NULL,
method="kounen", acc.davies=1e-5) {
##Arguments:
##
## Required:
## ped: data.frame with columns needed to create the pedigree
## geno: data.frame with ped, person ids in the first two columns, and numeric columns
## with minor allele count (0/1/2) for markers (columns) and subjects (rows) in
## the ped object
##
## Optional:## map: data.frame with columns chrom, position, and gene.
## gene can be gene symbol or geneid (entrez or ensemble);
## it is not matched with other annotation, just used for marker groups
## If not passed, assume all variants from the same gene
## male.dose: When doing X-chrom, define how male genotypes should be
## analyzed. male.dose can be between 0 and 2, but usually 1 or 2
## checkpeds, perform basic pedigree structure checks. Time-consuming if peds are
## are already validated
## method: either Kounen or Davies method to calculate kernel test p-values
## verbose.return: similar to glm in R, return geno and response data that is used in calculations
## weights: allow user-specified weights. If Null, use beta distribution weighting
## weights.mb: if weights=NULL and weights.MB=TRUE, do Madsen-Browning weights
## weights.beta: beta distribution coefficients used for weighting.
## By default, beta weights are used
## acc.davies: numerical accuracy for davies method to choose eigven values
## and to determine p-value
## Steps
## 1) verify ped columns, map and geno match dimensions
## 2) Create kinship matrices for autosomes and X for all subjects
## 3) run pedgene.stats on each gene given in map
verbose=FALSE
## save the call
call <- match.call()
## save options before setting stringsAsFactors for just this function
saveOpt <- options()
options(stringsAsFactors=FALSE)
## check method, must be davies or kounen
method=casefold(method[1])
method=c("kounen","davies")[pmatch(method, c("kounen", "davies"))]
if(is.na(method)) {
warning("method not kounen or davies, setting to kounen\n")
method="kounen"
}
## require kinship function to be recent
# kin2v <- sessionInfo()$otherPkgs$kinship2$Version
# if(is.null(kin2v)) {
# kin2v <- sessionInfo()$loadedOnly$kinship2$Version
# }
# if(as.numeric(substring(kin2v, 1, nchar(kin2v)-2)) < 1.5) {
# stop("kinship2 needs to be version 1.5.3 or later\n")
# }
## if no map, create one, assuming all one gene
if(is.null(map)) {
map <- data.frame(chrom=rep("unknown", ncol(geno)-2),
gene=rep("unknown", ncol(geno)-2))
}
## unify names of ped and map to lowercase
names(map) <- casefold(names(map))
names(ped) <- casefold(names(ped))
## old requirement was to have ped column, change internally to famid
names(ped) <- gsub("ped", "famid", names(ped))
names(geno) <- gsub("ped", "famid", names(geno))
## verify map data.frame ###
## i. check column names
if(any(!(c("chrom", "gene") %in% names(map)))) {
stop("map requires columns for chrom and gene")
}
## ii. recode chrom 23 to X
map$chrom[map$chrom==23] <- "X"
## verify geno matrix, and that it matches map
if(any(!(c("famid", "person") %in% names(geno)))) {
stop("geno requires columns 'famid' and 'person' ids")
}
## get indices of ped/person of geno to match ped, then strip off those columns
keepped <- match(paste(geno$famid, geno$person, sep="-"),
paste(ped$famid, ped$person, sep="-"))
tblkeep <- table(keepped)
## check for multiple subject entries and not matching a subject
if(any(tblkeep > 1)) {
warning(paste("subject with multiple entries, only the first is used: ", names(tblkeep)[which(tblkeep>1)], ".\n", sep=""))
geno <- geno[!duplicated(paste(geno$famid, geno$person, sep="-")),,drop=FALSE]
keepped <- match(paste(geno$famid, geno$person, sep="-"),
paste(ped$famid, ped$person, sep="-"))
}
if(any(is.na(keepped))) {
warning("removing subject in genotype matrix who is not in pedigree \n")
geno <- geno[!is.na(keepped),,drop=FALSE]
keepped <- match(paste(geno$famid, geno$person, sep="-"),
paste(ped$famid, ped$person, sep="-"))
}
## after matching, get rid of id cols
geno <- geno[,!(names(geno) %in% c("famid", "person")),drop=FALSE]
## rm subjects in geno who are not in ped
if(nrow(map) != (ncol(geno))) {
stop(paste("map rows (", nrow(map), ") and geno columns (", ncol(geno),
") do not match \n",sep=""))
}
## Check that geno for males on X should only have 0 and 1 dosages
xidx <- which(map$chrom=="X" | map$chrom=="x")
if(length(xidx)) {
xdosemale <- geno[ped$sex[keepped]==1,xidx, drop=TRUE]
if(sum(xdosemale>1, na.rm=TRUE)) {
stop("All male dose on X chromosome should be <= 1")
}
}
## verify ped data.frame has expected column names
if(any(!(c("famid", "person", "father", "mother", "sex", "trait")
%in% names(ped)))) {
stop("Error: ped requires columns: famid, person, father, mother, sex, trait")
}
#############################################################################
## this is where to do trait.adjusted if we want it on all people that have trait
## this caused different results when flipping major/minor alleles, so moved later
#############################################################################
## check weights parameters
## verify user-passed weights, match ncol(geno)
if(!is.null(weights)) {
## by default, do Beta weights, implemented in ped.gene.stats
## otherwise, these are user-specified, check length
if(length(weights) != ncol(geno)) {
stop(paste("Error: should have weights(", length(weights),
") for every variant position(", ncol(geno), ")", sep=""))
}
} else { ## no user-given weights
if(weights.mb==FALSE) {
## verify weights.beta
if(length(weights.beta) != 2 | any(weights.beta < 0)) {
warning("weights.beta should be two positive numbers, setting to (1,25)\n")
weights.beta=c(1,25)
}
} ## m-b weights, nothing to check except that weights.mb is true/false
}
## perform simple pedigree checks
if(checkpeds) {
uped <- unique(ped$famid)
nped <- length(uped)
for(i in 1:nped) {
iped <- uped[i]
temp.ped <- ped[ped$famid == iped,, drop=FALSE]
if(nrow(temp.ped) > 1) {
## simple checks on pedigree
pedigreeChecks(temp.ped, male.code=1, female.code=2)
}
}
}
## additional checks <could> be done on peds when creating pedlist object,
## which could be used to create kinmat.
# pedall <- with(ped, kinship2::pedigree(id=person, dadid=father, momid=mother,
# sex=sex, famid=ped, missid=missid))
# kinmat <- kinship2::kinship(pedall, chrtype="auto")
## We rather created it directly from ped
## create kinship matrix, also for X if any genes on X chrom
## subset to only those with geno rows
if(missing(relation)) {
ped2 <- with(ped, pedigree(famid=famid, id=person, dadid=father, momid=mother,
sex=sex, missid=0))
} else {
ped2 <- with(ped, pedigree(famid=famid, id=person, dadid=father, momid=mother,
sex=sex, missid=0, relation=relation))
}
kinmat <- Matrix(kinship(ped2, chrtype="autosome"))
kinmat <- kinmat[keepped, keepped]
if(any(map$chrom=="X")) {
kinmatX <- Matrix(kinship(ped2, chrtype="X"))
kinmatX <- kinmatX[keepped, keepped]
} else {
kinmatX <- NULL
}
ped <- ped[keepped,]
## subset pedgeno kinmat, kinmatX to only subject who have genotype data
missidx <- is.na(ped$trait) | apply(is.na(geno), 1, all)
if("trait.adjusted" %in% names(ped)) missidx <- missidx | is.na(ped$trait.adjusted)
if(sum(missidx)>0) {
ped <- ped[!missidx,]
kinmat <- kinmat[!missidx, !missidx]
kinmatX <- kinmatX[!missidx, !missidx]
geno <- geno[!missidx,,drop=FALSE]
}
## this is where trait.adjusted should be calculated if its on everyone with genotype data
## add trait.adjusted if not already there
if(!("trait.adjusted" %in% names(ped))) {
ped$trait.adjusted <- mean(ped$trait, na.rm=TRUE)
}
gvec <- chromvec <- nvariant <- noninform <- kstat <- kpval <- bstat <- bpval <- NULL
for(g in unique(map$gene)) {
if(verbose) {
cat("test on gene ", g, "\n")
}
gidx <- which(map$gene==g)
## drop=FALSE for 1-marker gene
genosub <- geno[,gidx,drop=FALSE]
resid <- ped$trait - ped$trait.adjusted
sex <- ped$sex
chrom <- map$chrom[gidx[1]]
c.factor <- quadfactor(
if(chrom=="X") kinmatX else kinmat,
chrom, resid, sex, male.dose)
pgstat <- pedgene.stats(genosub, as.vector(c.factor), map$chrom[gidx[1]], male.dose, sex, resid,
weights=weights[gidx], weights.beta=weights.beta, weights.mb=weights.mb,
method=method, acc.davies=acc.davies)
if(pgstat$nvariant==0) {
cat("gene: '", g, "' has no markers after removing markers with all same genotype\n")
}
gvec <- c(gvec, g)
chromvec <- c(chromvec, chrom)
nvariant <- c(nvariant,pgstat$nvariant)
noninform <- c(noninform, pgstat$noninform)
kstat <- c(kstat, pgstat$stat.kernel)
kpval <- c(kpval, pgstat$pval.kernel)
bstat <- c(bstat, pgstat$stat.burden)
bpval <- c(bpval, pgstat$pval.burden)
}
pgdf <- data.frame(gene=gvec, chrom=chromvec, n.variant=nvariant,
n.noninform=noninform,
stat.kernel=kstat, pval.kernel=kpval,
stat.burden=bstat, pval.burden=bpval)
# re-set options
options(saveOpt)
if(verbose.return) {
save <- list(geno=geno, ped=ped, map=map)
} else {
save=NULL
}
pglist <- list(pgdf=pgdf, call=call, save=save)
class(pglist) <- "pedgene"
return(pglist)
}
## print and summary methods for pedgene S3 class
print.pedgene <- function(x, ...) {
## suggest digits=4
print.data.frame(x$pgdf, ...)
invisible()
}
summary.pedgene <- function(object, ...) {
## suggest digits=4 or 5
cat("\nSummary for pedgene object: \n\n")
cat("Call:\n")
print(object$call)
cat("\n\n")
## invoke print method
print(object, ...)
invisible()
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.