# CODE: GWAS_meat_growth_traits Creation date:06/26/2015 Last modification:06/26/2015 Authors: DV SC JPS
#' efficiently calculates the diagonal of \code{x*y*t(x)}.
#' @title Efficient computation of diagonals of matrix products
#' @param x A matrix of size n by m
#' @param y A symetric matrix of size m by m
#' @return A vector of length n
#' @export
diagprod <- function(x, y) {
# by using this function the user will get the diagonal elements of X Y X' Check points If x and y are matrices
if (!(is.matrix(x)))
stop("x must be a matrix")
if (!(is.matrix(y)))
stop("y must be a matrix")
# Rows in x are the same as columns in y
if (ncol(x) != nrow(y))
stop("Number of rows in x must be equal to the number of columns in y")
# y is a square matrix
if (nrow(y) != ncol(y))
stop("y must be a square matrix")
# ck the names if(colnames(x)!=rownames(y)) stop ('x and y must have the same order')
diag_prod <- rowSums((x %*% y) * x)
return(diag_prod)
}
#' A frequency table of number of significant tests at given thresholds
#' @title Significance summary for a \code{\link{gwas}} object
#' @param gwas an object of the class gwas generated by \code{\link{gwas}}
#' @param pvalue a vector with the p-value thresholds to build frequency table
#' @param correction a string with value 'bonf' if compute bonferroni correction is needed
#' @return a frequency table with the number of SNP significant at the thresholds specified by pvalue
#' @export
summary.gwas <- function(gwas, pvalue = c(0.001, 0.01, 0.025, 0.05, 0.1), correction = "bonf") {
if (is.null(gwas))
stop("missing GWAS object")
if (all(class(gwas) != "gwas"))
stop("object must be of class gwas")
if (min(pvalue) < 0 || max(pvalue) > 1)
stop("range of pvalue must be between 0 and 1")
if (correction == "bonf") {
# Get the value that will be the threshold for those p values
threshold <- qnorm(-pvalue/(2 * nrow(gwas)) + 1)
} else {
threshold <- qnorm(-pvalue/2 + 1)
warning("no multiple test correction")
}
names(threshold) <- sub(0, replacement = "<0", pvalue)
# Crate Standarization of the values (standarized_values)
standarized_values <- gwas[["ghat"]]/sqrt(gwas[["var_ghat"]])
# Compare those Standarized effects with the threshold, check which ones are bigger
signif_values <- t(as.matrix(rowSums(sapply(abs(standarized_values), ">=", threshold))))
rownames(signif_values) <- "signif.values"
# change the name in case we want to use a print function
class(signif_values) <- "summary.gwas"
return(signif_values)
}
#' A function to create q-qplots of pvalues from high dimensional test (e.g: GWA, Differential expression analyses).
#' @title q-qplots of p-values
#' @param pvector A vector of pvalues, assumed to follow a uniform distribution under the null hypothesis
#' @param add a logical value, T: adds to existing graphic or F: creates a new graphic
#' @param ... additional parameters to parse to plot function
#' @return NULL. A graphic will be generated
#' @export
qqgplot = function(pvector, add = F, ...) {
o = -log10(sort(pvector, decreasing = F))
e = -log10(1:length(o)/length(o))
if (!add) {
plot(e, o, ..., xlab = expression(Expected ~ ~-log[10](italic(p))), ylab = expression(Observed ~ ~-log[10](italic(p))),
xlim = c(0, max(e)), ylim = c(0, max(o) + 1))
} else {
points(e, o, ...)
}
lines(e, e, col = "red")
}
#' A function to create manhattan plots from GWA and QTL mapping analyses.
#' @title Manhattan plots of p-values
#' @param pvalues A vector of pvalues with SNP, marker or position names
#' @param map a genetic map: a data.frame with SNP or marker names in rows, chromosome name (chr column) and position (pos column)
#' @param threshold the threshold of significance
#' @param col a vector of strings with color names for painting SNP according to their chromosome
#' @param chrom a vector of chromosomal names to be included in the graphic by default all chromosomes are included
#' @param ... additional graphical parameters used by plot
#' @return NULL. A graphic is be generated
#' @export
manhattan_plot<-function (pvalues, map, threshold = 0.01, col = c("black", "red"),
chrom = NULL, ...)
{
if (!("chr" %in% colnames(map)))
stop("chr should be a column of map")
if (sum(rownames(map) == names(pvalues)) < length(pvalues))
stop("make sure that the values are ordered, rownames of map and names of p-value vector should agree")
if ((max(pvalues) > 1) | (min(pvalues) < 0))
stop("pvalues should be in the range [0,1]")
if (sum(pvalues == 0) > 0) {
pvalues[pvalues == 0] <- min(pvalues[pvalues > 0])
warning("some p-values were exactly zero and they were replaced with the next smallest value")
}
if (!is.null(chrom)) {
idx <- as.character(map$chr) %in% as.character(chrom)
map <- map[idx, ]
pvalues <- pvalues[idx]
}
if (length(col) < 2)
stop("two colors are needed in the color vector")
lv<-as.character(unique(map$chr))
cl<-col[(1:length(lv))%%2+1]
names(cl)<-lv
plot(-log(pvalues, 10), pch = 20, col = cl[as.character(map$chr)],
ylab = "-log10-pvalue", xlab = "chr", ..., axes = F)
axis(2)
lns <- (by(map, map$chr, nrow))
lns<-lns[lv]
axis(1, at = c(0, cumsum(lns)[-length(lns)]) + as.vector(lns/2),
labels = lv)
box()
abline(h = -log(threshold, 10), lwd = 2, col = "green")
}
#' Computes pvalues from a GWA class object represents them graphically. using \code{\link{manhattan_plot}} and \code{\link{qqgplot}}
#' @title Significance plots and pvalues from \code{\link{gwas}} output
#' @param gwas an object of the class gwas generated by \code{\link{gwas}}
#' @param gpdata an object of class gpdata used to obtain a SNP map
#' @param correction a string with value 'bonf' if compute bonferroni correction is needed
#' @param plotlog10 a logical value indicating if a manhattan plot is requested
#' @param q.qplot a logical value indicating if a q-qplot is requested
#' @param pvalue significance threshold
#' @param ...
#' @return a vector of pvalues with names equal to marker names. Optional output: Manhattan plot and q-qplot of pvalues
#' @export
plot.gwas <- function(gwas, correction = "bonf", gpdata = NULL, plotlog10 = FALSE, pvalue = 0.05, q.qplot = FALSE, chrom = NULL,
...) {
# get the significance level
if (correction == "bonf") {
# Get the value that will be the threshold for those p values
threshold <- pvalue/nrow(gwas)
} else {
threshold <- pvalue
warning("no multiple test correction")
}
map <- na.omit(gpdata$map)
idx <- match(rownames(gwas), rownames(map))
if (any(is.na(idx)))
warning("some markers in the gwa object are not present in the map")
map <- map[idx, c("chr", "pos")]
pval <- getpvalue(gwas, log.p = F)
if (q.qplot == TRUE) {
qqgplot(pvector = pval, main = NULL, add = F, ...)
}
if (plotlog10 == TRUE) {
manhattan_plot(map = map, pvalues = pval, threshold = threshold, col = c("black", "red"), chrom = chrom, ...)
}
}
#' Pvalue for blup objects
#' @title pvalue computation for a \code{\link{gwas}} object
#' @param gwas an object of the class gwas generated by \code{\link{gwas}}
#' @param log.p a logical value to indicate if log10-pvalues are requested
#' @param is.z a logical value to indicate if z-scores are provided instead of estimate of snp effects and their variances
#' @return a vector of p-values or log10 p-values
#' @export
getpvalue <- function(gwas, log.p = T, is.z = F) {
if (is.null(gwas))
stop("missing GWAS object")
if (is.z) {
snpej <- gwas
} else {
if (all(class(gwas) != "gwas"))
stop("object must be of class gwas")
# Get the standarized values of the SNP
snpej <- gwas[["ghat"]]/sqrt(gwas[["var_ghat"]])
names(snpej) <- rownames(gwas)
}
# Calculate the p value
pval <- (2 - 3 * log.p) * pnorm(abs(snpej), lower.tail = F, log.p = log.p)/(log(10)^log.p) - log.p * log10(2)
return(pval)
}
#' A function used internally to compute Confidence Intervals of GWA peaks
#' @title Not intended for direct use. See \code{\link{IC_gwa}} for details
#' @param gbl a gblup object generated by \code{\link{gblup}}
#' @param Z a matrix of standardized genotypes
#' @param map a map dataset for the SNP in Z
#' @return the position of two peaks
#' @export
IC_base<-function(gbl,Z=NULL,map=NULL){
fenames<-all.vars(gbl$model$formula)
wt=NULL
rnames<-all.vars(gbl$model$Vformula)
if("wt"%in%rnames){
wt<-diag(gbl$model$wt)
names(wt)<-colnames(gbl$model$G)
}
rnames<-rnames[!rnames%in%c("G","wt","In")]
if(length(all.vars(update(gbl$model$Vformula,.~.-G-In-wt)))==1){
design=gbl$model$formula
} else{
design=c(gbl$model$formula,update(gbl$model$Vformula,.~.-G-In-wt))
}
rmname<-rnames[unlist(sapply(gbl$model[rnames],is.matrix))]
rvname<-rnames[unlist(!sapply(gbl$model[rnames],is.matrix))]
dts<-as.data.frame(gbl$model[names(gbl$model)%in%c(fenames,rvname)])
vdata<-gbl$model[names(gbl$model)%in%rmname]
lns<-nrow(dts)
id<-sample(x = c(T,F),lns,replace = T)
dt1<-dts[id,,drop=F]
dt2<-dts[!id,,drop=F]
nrw1<-gblup("y",data=dt1,design=design,G=gbl$model$G,vdata=vdata,wt=wt,pos=gbl$pos,start=gbl$sigma)
nrw2<-gblup("y",data=dt2,design=design,G=gbl$model$G,vdata=vdata,wt=wt,pos=gbl$pos,start=gbl$sigma)
t1<-gwas(nrw1,t(Z))
p1<-which.max(abs(t1[,1]/sqrt(t1[,2])))
t2<-gwas(nrw2,t(Z))
p2<-which.max(abs(t2[,1]/sqrt(t2[,2])))
d<-sort(c(map$pos[p1],map$pos[p2]))
return(d)
}
#' A function usedto compute Confidence Intervals of GWA peaks
#' @title Computes confidence interval of GWA peak
#' @param gbl a gblup object generated by \code{\link{gblup}}
#' @param Z a matrix of standardized genotypes
#' @param map a map dataset for the SNP in Z
#' @param rng a range of positions for which to compute the CI: a vector with three elements: chr, start, end
#' @param peak the position of the GWA peak should be within the range
#' @param alpha the confidence level for the confidence interval
#' @param iter the number of iterations
#' @param NP a logical value to compute non-parametric estimate
#' @return a data.frame with 3 rows: first row is corresponds to the lower limit, the central row corresponds to the peak and the 3rd row is the lower limit. the columns correspond to: a) name of closest snp, b) SNP chromosome, c) SNP position, D) exact position
#' @export
IC_gwa <- function(gbl,Z=NULL,map=NULL,rng=NULL,peak=0,alpha=0.99,iter=500,NP=F){
if (!is.null(rng)){
if (sum(rng[1]%in%map$chr)==0){
stop("chromosome in range is not in map")
}
map<-map[map$chr==rng[1],]
map<-map[(map$pos>=rng[2])&(map$pos<=rng[3]),]
if (nrow(map)<2){
stop("too few rows in your map, please adjust range")
}
if (sum(!rownames(map)%in%colnames(Z))>0){
stop("some SNP in the map not present in Z matrix")
}
Z<-Z[,rownames(map)]
if(class(gbl)!="gblup"){
stop("gbl should be a gblup object")
}
}
if((peak<map$pos[1])|(peak>map$pos[length(map$pos)])){
stop("peak should be within the range of snp in the map")
}
vs<-replicate(iter,IC_base(gbl,Z,map))
if (!NP){
vs<-vs[2,]-vs[1,]
se<-sqrt(sum(vs^2)/(4*length(vs)))
ft<-qnorm(1-(1-alpha)/2)
IC<-c(peak-ft*se,peak+ft*se)} else{
i<- seq(0,1-alpha,0.0001)
ICt<-sapply(i,function(x) c(quantile(vs[1,],x),quantile(vs[2,],0.95-x)))
lng<-ICt[2,]-ICt[1,]
IC<-ICt[,which.min(lng)]
}
closestsnppeak<-which.min(abs(map$pos-peak))
if((IC[1]<rng[2])|(IC[2]>rng[3])){
cat("WARNING: confidence interval boundaries lie outside of the scanned region. It is recommended in increase the range and recompute IC\n")
}
closestsnpll<-which.min(abs(map$pos-IC[1]))
closestsnpul<-which.min(abs(map$pos-IC[2]))
selmap<-map[c(closestsnpll,closestsnppeak,closestsnpul),]
selmap$exact<-c(IC[1],peak,IC[2])
return(selmap)
}
#' A function to find secondary peaks in a GWA
#' @title a function to confirm GWA peaks and to detect secondary peaks within chromosomes
#' @param gbl a gblup object generated by \code{\link{gblup}}
#' @param gwa a gblup object generated by \code{\link{gwa}}
#' @param Z a matrix of standardized genotypes
#' @param map a map dataset for the SNP in Z
#' @param threshold a significance threshold for GWA
#' @param criteria the p-value correction to use
#' @return list with three elements
#' \itemize{
#' \item {\code{gblup}} {the new gblup object with peak SNP as fixed effects}
#' \item{\code{gwas}} {a new gwas objectest to look for peaks}
#' \item{\code{peaks}} {a map with the positions of the included peaks}
#'}
#' @export
find_peaks <- function(gbl,gwa,Z,map,threshold=0.95,criteria=c("qvalue","bonferroni","uncorrected")){
criteria<-criteria[1]
if (!criteria%in%c("qvalue","bonferroni","uncorrected")) {
stop("the criteria should be qvalue, bonferroni or uncorrected")
}
if (class(gbl)!="gblup"){
stop("gbl should be of class gblup")
}
if(sum((rownames(gwa)!=colnames(Z))|((rownames(gwa)!=rownames(map))))>0){
stop("map, gwa object and Z should have matching row/columns")
}
pv<-getpvalue(gwa,log.p = F)
if(criteria=="qvalue"){
pv<-qvalue(pv)$qvalue
}
if(criteria=="bonferroni"){
pv<-1-(1-pv)^length(pv)
}
ns<-sum(pv<threshold)
if(ns==0){
stop("no SNP reach the significance threshold")
}
minv<-tapply(pv,map$chr,function(x) names(x)[which.min(x)])
mins<-as.vector(by(pv,map$chr,min))
bsf<-update(gbl$model$formula,as.formula(paste(".~.+",paste(minv[mins<threshold],sep="",collapse="+"))))
fenames<-all.vars(gbl$model$formula)
wt=NULL
rnames<-all.vars(gbl$model$Vformula)
if("wt"%in%rnames){
wt<-diag(gbl$model$wt)
names(wt)<-colnames(gbl$model$G)
}
rnames<-rnames[!rnames%in%c("G","wt","In")]
if(length(all.vars(update(gbl$model$Vformula,.~.-G-In-wt)))==1){
design=bsf
} else{
design=c(bsf,update(gbl$model$Vformula,.~.-G-In-wt))
}
rmname<-rnames[unlist(sapply(gbl$model[rnames],is.matrix))]
rvname<-rnames[unlist(!sapply(gbl$model[rnames],is.matrix))]
dts<-as.data.frame(gbl$model[names(gbl$model)%in%c(fenames,rvname)])
tobind<-Z[rownames(dts),minv[mins<threshold],drop=F]
colnames(tobind)<-minv[mins<threshold]
dts<-cbind(dts,tobind)
vdata<-gbl$model[names(gbl$model)%in%rmname]
nrw1<-gblup("y",data=dts,design=design,G=gbl$model$G,vdata=vdata,wt=wt,pos=gbl$pos,start=gbl$sigma)
t1<-gwas(nrw1,t(Z))
return(list(gblup=nrw1,gwas=t1,peaks=map[minv[mins<threshold],]))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.