Nothing
##' Pairwise meta-analysis of results from GWA scans
##'
##' Performes meta-analysis of results of two individual GWA studies
##'
##' Original data frames should contain a number of variables,
##' such as allelic coding, code for the effective allele,
##' etc. Please refer to the \code{formetascore} function of the
##' \code{GenABEL} package for details.
##' @param data.x First data frame with GWA data
##' @param data.y Second data frame with GWA data
##' @param name.x First study name
##' @param name.y Second study name
##' @param precorrect Should GC be applied to the original data before pooling
##' @param correct.pooled Whether to apply Genomic Control correction to
##' the study named "POOLED"
##' @return A data frame containing summary statistics and essential details
##' of the individual studies
##' @author Yurii Aulchenko
##' @seealso \code{\link{metagwa.files}}
##' @keywords htest
##' @export
"metagwa.tables" <-
function(data.x, data.y, name.x="P1", name.y="P2",
precorrect=TRUE, correct.pooled=FALSE) {
required <- c("name", "allele1", "allele2", "effallele", "beta", "sebeta")
optional <- c("chromosome", "position", "n", "effallelefreq")
n.x <- names(data.x)
essen <- match(required, n.x)
if (any(is.na(essen))) {
cat("Required fields missing in data.x:",
required[is.na(essen)], "\n")
stop()
}
opt <- match(optional, n.x)
if (any(is.na(essen))) {
cat("Optional fields missing in data.x:",
required[is.na(essen)], "\n")
}
n.y <- names(data.y)
essen <- match(required, n.y)
if (any(is.na(essen))) {
cat("Required fields missing in data.y:",
required[is.na(essen)], "\n");
stop()
}
opt <- match(optional, n.y)
if (any(is.na(essen))) {
cat("Optional fields missing in data.y:",
required[is.na(essen)], "\n");
}
pos.x <- 1
if (is.na(match("position", names(data.x)))) pos.x <- 0
pos.y <- 1
if (is.na(match("position", names(data.y)))) pos.y <- 0
chr.x <- 1
if (is.na(match("chromosome", names(data.x)))) chr.x <- 0
chr.y <- 1
if (is.na(match("chromosome", names(data.y)))) chr.y <- 0
df <- merge(data.x, data.y, by="name", all.x=TRUE, all.y=TRUE)
## Do checks
err1 <- which(df$allele1.x != df$effallele.x &
df$allele2.x != df$effallele.x)
if (length(err1) > 0) {
cat("Effective allele does not correspond to either allele1 or allele2 in",
name.x, "for SNPs\n")
print(df$name[err1])
stop("Stopped")
}
err2 <- which(df$allele1.y != df$effallele.y &
df$allele2.y != df$effallele.y)
if (length(err1) > 0) {
cat("Effective allele does not correspond to either allele1 or allele2 in",
name.x, "for SNPs\n")
print(df$name[err1])
stop("Stopped")
}
gc()
## OK, now check that effective allele is the second one
swall1 <- which(df$allele2.x != df$effallele.x)
if (length(swall1) > 0) {
keep <- df$allele2.x[swall1]
df$allele2.x[swall1] <- df$allele1.x[swall1]
df$allele1.x[swall1] <- keep
}
swall2 <- which(df$allele2.y != df$effallele.y)
if (length(swall2) > 0) {
keep <- df$allele2.y[swall2]
df$allele2.y[swall2] <- df$allele1.y[swall2]
df$allele1.y[swall2] <- keep
}
gc()
## OK, now put TA->AT, GA->AG etc (A>T>G>C)
recX <- which(df$allele1.x=="T" & df$allele2.x=="A")
if (length(recX)>0) df <- swap.x(df, recX)
recX <- which(df$allele1.x=="G" & df$allele2.x=="A")
if (length(recX)>0) df <- swap.x(df, recX)
recX <- which(df$allele1.x=="C" & df$allele2.x=="A")
if (length(recX)>0) df <- swap.x(df, recX)
recX <- which(df$allele1.x=="G" & df$allele2.x=="T")
if (length(recX)>0) df <- swap.x(df, recX)
recX <- which(df$allele1.x=="C" & df$allele2.x=="T")
if (length(recX)>0) df <- swap.x(df, recX)
recX <- which(df$allele1.x=="C" & df$allele2.x=="G")
if (length(recX)>0) df <- swap.x(df, recX)
recX <- which(df$allele1.y=="T" & df$allele2.y=="A")
if (length(recX)>0) df <- swap.y(df, recX)
recX <- which(df$allele1.y=="G" & df$allele2.y=="A")
if (length(recX)>0) df <- swap.y(df, recX)
recX <- which(df$allele1.y=="C" & df$allele2.y=="A")
if (length(recX)>0) df <- swap.y(df, recX)
recX <- which(df$allele1.y=="G" & df$allele2.y=="T")
if (length(recX)>0) df <- swap.y(df, recX)
recX <- which(df$allele1.y=="C" & df$allele2.y=="T")
if (length(recX)>0) df <- swap.y(df, recX)
recX <- which(df$allele1.y=="C" & df$allele2.y=="G")
if (length(recX)>0) df <- swap.y(df, recX)
gc()
## OK, now try to unify coding between studies
swcode <- which(df$effallele.x != df$effallele.y |
df$allele1.x != df$allele1.y |
df$allele2.x != df$allele2.y)
if (length(swcode) > 0) {
df <- recode.y(df, swcode)
recX <- which(df$allele1.y=="T" & df$allele2.y=="A")
if (length(recX)>0) df <- swap.y(df, recX)
recX <- which(df$allele1.y=="G" & df$allele2.y=="A")
if (length(recX)>0) df <- swap.y(df, recX)
recX <- which(df$allele1.y=="C" & df$allele2.y=="A")
if (length(recX)>0) df <- swap.y(df, recX)
recX <- which(df$allele1.y=="G" & df$allele2.y=="T")
if (length(recX)>0) df <- swap.y(df, recX)
recX <- which(df$allele1.y=="C" & df$allele2.y=="T")
if (length(recX)>0) df <- swap.y(df, recX)
recX <- which(df$allele1.y=="C" & df$allele2.y=="G")
if (length(recX)>0) df <- swap.y(df, recX)
}
gc()
## OK, now nullify ambigous coding with wrong strand info
toerX <- which(df$allele1.x == "A" & df$allele2.x == "T" &
(df$strand.x!="+" & df$strand.x!="-" &
df$strand.x!="TOP" & df$strand.x!="BOT"))
if (length(toerX) > 0) {
cat("AT coding without +/-/TOP/BOT strand info in",
name.x, "\n")
cat(length(toerX), "SNPs nullified\n")
df$n.x[toerX] <- NA
df$beta.x[toerX] <- NA
}
gc()
toerX <- which(df$allele1.x == "G" & df$allele2.x == "C" &
(df$strand.x!="+" & df$strand.x!="-" &
df$strand.x!="TOP" & df$strand.x!="BOT"))
if (length(toerX) > 0) {
cat("GC coding without +/-/TOP/BOT strand info in",
name.x, "\n")
cat(length(toerX), "SNPs nullified\n")
df$n.x[toerX] <- NA
df$beta.x[toerX] <- NA
}
gc()
toerX <- which(df$allele1.y == "A" & df$allele2.y == "T" &
(df$strand.y!="+" & df$strand.y!="-" &
df$strand.y!="TOP" & df$strand.y!="BOT"))
if (length(toerX) > 0) {
cat("AT coding without +/-/TOP/BOT strand info in",
name.y, "\n")
cat(length(toerX), "SNPs nullified\n")
df$n.y[toerX] <- NA
df$beta.y[toerX] <- NA
}
gc()
toerX <- which(df$allele1.y == "G" & df$allele2.y == "C" &
(df$strand.y!="+" & df$strand.y!="-" &
df$strand.y!="TOP" & df$strand.y!="BOT"))
if (length(toerX) > 0) {
cat("GC coding without +/-/TOP/BOT strand info in",
name.y, "\n")
cat(length(toerX), "SNPs nullified\n")
df$n.y[toerX] <- NA
df$beta.y[toerX] <- NA
}
gc()
swcode <- which(df$effallele.x != df$effallele.y |
df$allele1.x != df$allele1.y |
df$allele2.x != df$allele2.y)
if (length(swcode) > 0) {
cat("Different coding in two populations\n")
cat(length(swcode), "SNPs removed\n")
tmp <- df[swcode, ]
write.csv(tmp, file="failedcode.csv", row.names=FALSE)
df <- df[!(c(1:(dim(df)[1])) %in% swcode), ]
}
gc()
swcode <- which(is.na(df$beta.x) & is.na(df$beta.y))
if (length(swcode)>0) {
cat("NA for betas in both populations\n")
cat(length(swcode), "SNPs removed\n")
df <- df[!(c(1:(dim(df)[1])) %in% swcode), ]
}
gc()
## OK
cat("analysing ... \n")
# if (pop==pops[2]) name.x <- pops[1] else name.x <- "POOLED"
metaout <- dometa(df, name.x=name.x, name.y=name.y,
precorrect=precorrect,
correct.pooled=correct.pooled,
pos.x, pos.y, chr.x, chr.y)
cat("... DONE\n")
gc()
metaout
}
dometa <- function(data, name.x, name.y, precorrect=FALSE,
correct.pooled=FALSE, pos.x, pos.y, chr.x, chr.y) {
# prop <- 0.98
chi2med <- qchisq(.5, 1)
b.x <- (data$beta.x)
b.y <- (data$beta.y)
se.x <- abs(data$sebeta.x)
se.y <- abs(data$sebeta.y)
if (name.x == "POOLED" & !correct.pooled) {
lam.x <- 1.0
} else {
lam.x <- median(b.x * b.x / (se.x * se.x), na.rm=TRUE) / chi2med
}
lam.y <- median(b.y * b.y / (se.y * se.y), na.rm=TRUE) / chi2med
if (name.x != "POOLED")
cat("Lambda", name.x, "=", lam.x, "\n")
else if (correct.pooled)
cat("Lambda POOLED previous =", lam.x, "\n")
cat("Lambda", name.y, "=", lam.y, "\n")
if (lam.x < 1) {
warning(paste("Lambda", name.x, "< 1; constrained to 1"),
immediate.=TRUE); lam.x <- 1
}
if (lam.y < 1) {
warning(paste("Lambda", name.y, "< 1; constrained to 1"),
immediate.=TRUE); lam.y <- 1
}
if (precorrect & name.x!="POOLED") {
se.x <- sqrt(se.x*se.x*lam.x)
}
if (precorrect) {
se.y <- sqrt(se.y*se.y*lam.y)
}
lam.x <- median(b.x * b.x / (se.x * se.x), na.rm=TRUE)/chi2med
lam.y <- median(b.y * b.y / (se.y * se.y), na.rm=TRUE)/chi2med
if (name.x != "POOLED")
cat("Corrected Lambda", name.x, "=", lam.x, "\n")
else if (correct.pooled)
cat("Corrected Lambda POOLED previous =", lam.x, "\n")
cat("Corrected Lambda", name.y, "=", lam.y, "\n")
w2.x <- 1./(se.x * se.x)
w2.y <- 1./(se.y * se.y)
invsumw2 <- 1./(w2.x + w2.y)
mbeta <- (b.x*w2.x + b.y*w2.y)*invsumw2
mse <- sqrt(invsumw2)
mp <- pchisq(mbeta*mbeta/(mse*mse), 1, lower.tail=FALSE)
if (name.x != "POOLED") {
npops <- 1*(!is.na(data$beta.x)) + 1*(!is.na(data$beta.y))
} else {
npops <- data$npops + 1*(!is.na(data$beta.y))
if (any(is.na(npops))) {
npops[is.na(npops)] <-
1 * (!is.na(data$beta.x[is.na(npops)])) +
1 * (!is.na(data$beta.y[is.na(npops)]))
}
}
na.x <- which(is.na(b.x) & !is.na(b.y))
na.y <- which(is.na(b.y) & !is.na(b.x))
# na.xy <- which(is.na(b.y) & is.na(b.x))
# if (length(na.xy)>0) stop("Number of subjects missing in both data sets...")
out <- data.frame(name=data$name, stringsAsFactors=FALSE)
out$strand <- data$strand.x
out$allele1 <- data$allele1.x
out$allele2 <- data$allele2.x
out$effallele <- data$effallele.x
if (chr.x & chr.y) {
out$chromosome <- data$chromosome.x
out$chromosome[na.x] <- data$chromosome.y[na.x]
} else if (chr.x) {
out$chromosome <- data$chromosome.x
} else if (chr.y) {
out$chromosome <- data$chromosome.y
}
# print(c(pos.x, pos.y))
if (pos.x & pos.y) {
out$position <- data$position.x
out$position[na.x] <- data$position.y[na.x]
} else if (pos.x | pos.y) {
out$position <- data$position
}
out$strand[na.x] <- data$strand.y[na.x]
out$allele1[na.x] <- data$allele1.y[na.x]
out$allele2[na.x] <- data$allele2.y[na.x]
out$effallele[na.x] <- data$effallele.y[na.x]
out$n <- data$n.x + data$n.y
out$n[na.x] <- data$n.y[na.x]
out$n[na.y] <- data$n.x[na.y]
out$npops <- npops
out$beta <- mbeta
out$sebeta <- mse
out$beta[na.x] <- b.y[na.x]
out$sebeta[na.x] <- se.y[na.x]
out$beta[na.y] <- b.x[na.y]
out$sebeta[na.y] <- se.x[na.y]
na.x <- which(is.na(data$effallelefreq.x))
na.y <- which(is.na(data$effallelefreq.y))
nNAb <- which(!is.na(data$effallelefreq.x) &
!is.na(data$effallelefreq.y))
out$effallelefreq <- rep(NA, dim(data)[1])
out$effallelefreq[nNAb] <- (data$effallelefreq.x[nNAb] *
data$n.x[nNAb] +
data$effallelefreq.y[nNAb] *
data$n.y[nNAb]) / (data$n.x[nNAb]
+
data$n.y[nNAb])
out$effallelefreq[na.x] <- data$effallelefreq.y[na.x]
out$effallelefreq[na.y] <- data$effallelefreq.x[na.y]
# print(data$effallelefreq.x)
# print(data$effallelefreq.y)
# print(out$effallelefreq)
na.x <- which(is.na(data$call.x))
na.y <- which(is.na(data$call.y))
nNAb <- which(!is.na(data$call.x) & !is.na(data$call.y))
out$call <- rep(NA, dim(data)[1])
# print(na.x)
# print(nNAb)
out$call[nNAb] <- (data$call.x[nNAb] * data$n.x[nNAb] +
data$call.y[nNAb] * data$n.y[nNAb]) /
(data$n.x[nNAb] + data$n.y[nNAb])
out$call[na.x] <- data$call.y[na.x]
out$call[na.y] <- data$call.x[na.y]
# print(data$pexhwe.y)
if (all(data$pexhwe.x <= 1.0, na.rm=TRUE)) {
data$pexhwe.x <- qchisq(1. - data$pexhwe.x, 1)
}
if (all(data$pexhwe.y <= 1.0, na.rm=TRUE)) {
data$pexhwe.y <- qchisq(1. - data$pexhwe.y, 1)
}
# print(data$pexhwe.y)
na.x <- which(is.na(data$pexhwe.x))
na.y <- which(is.na(data$pexhwe.y))
nNAb <- which(!is.na(data$pexhwe.x) & !is.na(data$pexhwe.y))
out$pexhwe <- rep(NA, dim(data)[1])
out$pexhwe[nNAb] <- data$pexhwe.x[nNAb]+data$pexhwe.y[nNAb]
out$pexhwe[na.x] <- data$pexhwe.y[na.x]
out$pexhwe[na.y] <- data$pexhwe.x[na.y]
s <- match(substr(names(data), 1, 5), "obeta")
if (any(!is.na(s))) s <- which(!is.na(s))
if (any(!is.na(s))) out <- cbind(out, data[, names(data)[s]])
if (name.x != "POOLED") out[, paste("obeta", name.x,sep="")] <- b.x
out[, paste("obeta", name.y,sep="")] <- b.y
s <- match(substr(names(data), 1, 3), "ose")
if (any(!is.na(s))) s <- which(!is.na(s))
if (any(!is.na(s))) out <- cbind(out, data[, names(data)[s]])
if (name.x != "POOLED") out[, paste("ose", name.x,sep="")] <- se.x
out[, paste("ose", name.y,sep="")] <- se.y
cat("Lambda POOLED data =", median((out$beta / out$sebeta)^2,
na.rm=TRUE) / chi2med, "\n")
out$chi2 <- out$beta * out$beta / (out$sebeta * out$sebeta)
out$p <- pchisq(out$chi2, 1, lower.tail=FALSE)
out
}
swap.y <- function(df, swcode) {
df$beta.y[swcode] <- (-1.) * df$beta.y[swcode]
df$effallelefreq.y[swcode] <- (1. - df$effallelefreq.y[swcode])
a1.save <- df$allele1.y[swcode]
a2.save <- df$allele2.y[swcode]
df$effallele.y[swcode] <- a1.save
df$allele1.y[swcode] <- a2.save
df$allele2.y[swcode] <- a1.save
df
}
swap.x <- function(df, swcode) {
df$beta.x[swcode] <- (-1.) * df$beta.x[swcode]
df$effallelefreq.x[swcode] <- (1. - df$effallelefreq.x[swcode])
a1.save <- df$allele1.x[swcode]
a2.save <- df$allele2.x[swcode]
df$effallele.x[swcode] <- a1.save
df$allele1.x[swcode] <- a2.save
df$allele2.x[swcode] <- a1.save
df
}
recode.y <- function(df, swcode) {
a2t <- swcode[df$effallele.y[swcode]=="A"]
t2a <- swcode[df$effallele.y[swcode]=="T"]
g2c <- swcode[df$effallele.y[swcode]=="G"]
c2g <- swcode[df$effallele.y[swcode]=="C"]
df$effallele.y[a2t] <-"T"
df$effallele.y[t2a] <-"A"
df$effallele.y[g2c] <-"C"
df$effallele.y[c2g] <-"G"
a2t <- swcode[df$allele1.y[swcode]=="A"]
t2a <- swcode[df$allele1.y[swcode]=="T"]
g2c <- swcode[df$allele1.y[swcode]=="G"]
c2g <- swcode[df$allele1.y[swcode]=="C"]
df$allele1.y[a2t] <-"T"
df$allele1.y[t2a] <-"A"
df$allele1.y[g2c] <-"C"
df$allele1.y[c2g] <-"G"
a2t <- swcode[df$allele2.y[swcode]=="A"]
t2a <- swcode[df$allele2.y[swcode]=="T"]
g2c <- swcode[df$allele2.y[swcode]=="G"]
c2g <- swcode[df$allele2.y[swcode]=="C"]
df$allele2.y[a2t] <-"T"
df$allele2.y[t2a] <-"A"
df$allele2.y[g2c] <-"C"
df$allele2.y[c2g] <-"G"
f2r <- swcode[df$strand.y[swcode]=="+"]
r2f <- swcode[df$strand.y[swcode]=="-"]
t2b <- swcode[df$strand.y[swcode]=="TOP"]
b2t <- swcode[df$strand.y[swcode]=="BOT"]
df$strand.y[f2r] <- "-"
df$strand.y[r2f] <- "+"
df$strand.y[t2b] <- "BOT"
df$strand.y[b2t] <- "TOP"
if (any(is.na(df$strand.y[swcode]))) {
warning("NAs in strand!")
}
if (any(df$strand.y[swcode]!="+" &
df$strand.y[swcode]!="-" &
df$strand.y[swcode]!="TOP" &
df$strand.y[swcode]!="BOT")) {
warning("Unrecognised coding in strand!")
}
df
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.