Nothing
minP.test <- function(Y, G1, G2){
Y.arg <- deparse(substitute(Y))
G1.arg <- deparse(substitute(G1))
G2.arg <- deparse(substitute(G2))
# Checking if gene splitting is needed
if (ncol(G1) * ncol(G2) > 1000){
if (ncol(G1) > 30){
## Searching for clusters in G1
distance.G1 <- snpStats::ld(G1, G1, stats='R.squared')
distance.G1 <- as.dist(1 - distance.G1)
clust.tree.G1 <- chclust(distance.G1)
k1 <- cutree(clust.tree.G1, k=seq_len((ncol(G1)-30)))
#max.G1 <- sapply(seq_len(ncol(G1)-30),FUN=function(i){return(max(table(as.factor(k1[,i]))))})
max.G1 <- vapply(seq_len(ncol(G1)-30),FUN=function(i){return(max(table(as.factor(k1[,i]))))},FUN.VALUE=0)
# max.G1 <- max.G1[c(which(max.G1 > 20),which(max.G1 < 30)[1])]
id.max.G1 <- which(max.G1 <= 30)[1]
} else{
k1 <- matrix(rep(1,ncol(G1)),ncol=1)
row.names(k1) <- colnames(G1)
max.G1 <- ncol(G1)
id.max.G1 <- 1
}
## Searching for clusters in G1
if (ncol(G2) >30){
distance.G2 <- snpStats::ld(G2, G2, stats='R.squared')
distance.G2 <- as.dist(1 - distance.G2)
clust.tree.G2 <- chclust(distance.G2)
k2 <- cutree(clust.tree.G2, k=seq_len(ncol(G2)-30))
# max.G2 <- sapply(seq_len(ncol(G2)-30),FUN=function(i){return(max(table(as.factor(k2[,i]))))})
max.G2 <- vapply(seq_len(ncol(G2)-30),FUN=function(i){return(max(table(as.factor(k2[,i]))))},FUN.VALUE=0)
# max.G2 <- max.G2[c(which(max.G2 > 20),which(max.G2 < 30)[1])]
id.max.G2 <- which(max.G2 <= 30)[1]
} else {
k2 <- matrix(rep(1,ncol(G2)),ncol=1)
row.names(k2) <- colnames(G2)
max.G2 <- ncol(G2)
id.max.G2 <- 1
}
## Detection of the subset of SNPs that cut each gene in sufficiently small pieces
#mult <- outer(max.G1[ind.max.G1],max.G2[ind.max.G2],"*")
#tmp.mult <- (mult < 900)*mult
division <- c(id.max.G1,id.max.G2)
# division <- which(tmp.mult==max(tmp.mult),arr.ind=TRUE)[1,]
# division <- which(mult < 900,arr.ind=TRUE)[1,]
# boundaries.start.G1 <- c(1,1+as.numeric(which(sapply(seq_len(length(k1[,division[1]])-1),FUN=function(i){return(k1[,division[1]][i]!=k1[,division[1]][i+1])}))))
# boundaries.end.G1 <- c(as.numeric(which(sapply(seq_len(length(k1[,division[1]])-1),FUN=function(i){return(k1[,division[1]][i]!=k1[,division[1]][i+1])}))),ncol(G1))
boundaries.start.G1 <- c(1,1+as.numeric(which(vapply(seq_len(length(k1[,division[1]])-1),FUN=function(i){return(k1[,division[1]][i]!=k1[,division[1]][i+1])},FUN.VALUE=TRUE))))
boundaries.end.G1 <- c(as.numeric(which(vapply(seq_len(length(k1[,division[1]])-1),FUN=function(i){return(k1[,division[1]][i]!=k1[,division[1]][i+1])},FUN.VALUE=TRUE))),ncol(G1))
# boundaries.start.G2 <- c(1,1+as.numeric(which(sapply(seq_len(length(k2[,division[2]])-1),FUN=function(i){return(k2[,division[2]][i]!=k2[,division[2]][i+1])}))))
# boundaries.end.G2 <- c(as.numeric(which(sapply(seq_len(length(k2[,division[2]])-1),FUN=function(i){return(k2[,division[2]][i]!=k2[,division[2]][i+1])}))),ncol(G2))
boundaries.start.G2 <- c(1,1+as.numeric(which(vapply(seq_len(length(k2[,division[2]])-1),FUN=function(i){return(k2[,division[2]][i]!=k2[,division[2]][i+1])},FUN.VALUE=TRUE))))
boundaries.end.G2 <- c(as.numeric(which(vapply(seq_len(length(k2[,division[2]])-1),FUN=function(i){return(k2[,division[2]][i]!=k2[,division[2]][i+1])},FUN.VALUE=TRUE))),ncol(G2))
} else{
boundaries.start.G1 <- c(1)
boundaries.start.G2 <- c(1)
boundaries.end.G1 <- c(ncol(G1))
boundaries.end.G2 <- c(ncol(G2))
}
sub.pairs.start <- expand.grid(boundaries.start.G1,boundaries.start.G2)
sub.pairs.end <- expand.grid(boundaries.end.G1,boundaries.end.G2)
sub.pairs <- data.frame(
start.G1=sub.pairs.start[,1],
end.G1=sub.pairs.end[,1],
start.G2=sub.pairs.start[,2],
end.G2=sub.pairs.end[,2]
)
# All pairs are iterated over
pairs.p.val <- rep(NA,times=nrow(sub.pairs))
pairs.stat <- rep(NA,times=nrow(sub.pairs))
for (i in seq_len(nrow(sub.pairs))){
# Cluster boundaries for both genes
G1.boundary <- (sub.pairs$start.G1[i]):(sub.pairs$end.G1[i])
G2.boundary <- (sub.pairs$start.G2[i]):(sub.pairs$end.G2[i])
# c.test <- list(p.value=runif(1),statistic=runif(1))
c.test <-minP.test.2pairs(Y, G1[, G1.boundary], G2[, G2.boundary])
pairs.p.val[i] <- c.test$p.value
pairs.stat[i] <- c.test$statistic
}
tmp <- p.adjust(pairs.p.val, "BH")
pval <- min(tmp)[1]
stat <- pairs.stat[which.min(tmp)]
names(stat)="Wmax"
# res <- list(statistic=stat,p.value=pval,method="minP")
# class(res) <- "GGItest"
# return(res)
null.value <- NULL
# names(null.value) <- "Wmax"
estimate <- c(stat)
names(estimate) <- c("Wmax")
parameters <- NULL
# names(parameters) <- ""
res <- list(
null.value=null.value,
alternative="greater",
method="Gene-based interaction based on minP method",
estimate= estimate,
data.name=paste(Y.arg," and (",G1.arg," , ",G2.arg,")",sep=""),
statistic=stat,
p.value=pval,
parameters=parameters)
class(res) <- "htest"
return(res)
}
minP.test.2pairs <- function(Y, G1, G2){
Y.arg <- deparse(substitute(Y))
G1.arg <- deparse(substitute(G1))
G2.arg <- deparse(substitute(G2))
if (!is.null(dim(Y))) {
Y <- Y[, 1]
}
if (nlevels(as.factor(Y)) != 2) {
stop("response variable should be binary. (2 modes).")
} else if (!is(G1,"SnpMatrix") | !is(G2,"SnpMatrix")) {
stop("G1 and G2 arguments should be SnpMatrix objects.")
} else if (nrow(G1) != nrow(G2)) {
stop("G1 and G2 should have same rows count.")
} else if (length(Y) != nrow(G1) | length(Y) != nrow(G2)) {
stop("Response variable should be conformant with genes matrices rows number.")
} else if (sum(is.na(G1))!=0) {
stop("The snpMatrix must be complete. No NAs are allowed.")
} else if (sum(is.na(G2))!=0) {
stop("The snpMatrix must be complete. No NAs are allowed.")
} else if (sum(is.na(Y)) != 0) {
stop("The response variable vector must be complete. No NAs are allowed.")
}
Y <- as.numeric(Y)
if(min(Y)!=0){Y<-Y-min(Y)}
Y <- as.factor(Y)
# minP test is set up
# SNP interactions are computed
SSI.res <- SSI.test(Y, G1, G2)
## Computation of the correlation matrix
MatCor1 <- snpStats::ld(G1, G1, stats="R")
MatCor2 <- snpStats::ld(G2, G2, stats="R")
n1 <- ncol(MatCor1)
n2 <- ncol(MatCor2)
n.pairs <- n1*n2
sigma.matrix <- matrix(NA,ncol=n.pairs,nrow=n.pairs)
for (i in seq_len(n.pairs-1)){
i1 <- floor((i-1)/n2)+1
j1 <- i-(i1-1)*n2
for (j in (i+1):n.pairs){
i2 <- floor((j-1)/n2)+1
j2 <- j-(i2-1)*n2
sigma.matrix[i,j] <- sigma.matrix[j,i] <- MatCor1[i1,i2]*MatCor2[j1,j2]
}
}
diag(sigma.matrix) <- 1
# minP test is computed past this point
SSI.min <- min(SSI.res, na.rm=TRUE)
# Case in which 1 - SSI.min/2 == 1 (qnorm(1) = -Inf)
if (SSI.min == 0) {
GG.Pmin <- 0
# Case in which 1 - SSI.min/2 == 1 (qnorm(1) = Inf)
} else {
lower <- rep(qnorm(SSI.min / 2), ncol(sigma.matrix))
upper <- rep(qnorm(1 - SSI.min / 2), ncol(sigma.matrix))
GG.Pmin <- 1 - mvtnorm::pmvnorm(lower=lower,upper=upper,sigma=sigma.matrix,maxpts=2.5E6,abseps = 0.01
#abseps=1E-13
)
}
if (attributes(GG.Pmin)$msg=="Completion with error > abseps"){
warning(paste("p-values are approximated with error=",attributes(GG.Pmin)$error))
}
pval <- as.numeric(GG.Pmin)
stat <- SSI.min
names(stat)="Wmax"
#res <- list(statistic=stat,p.value=pval,method="minP")
#class(res) <- "GGItest"
# return(res)
null.value <- 0
names(null.value) <- "Wmax"
estimate <- c(stat)
names(estimate) <- c("Wmax")
parameters <- NULL
# names(parameters) <- ""
res <- list(
null.value=null.value,
alternative="greater",
method="Gene-based interaction based on minP method",
estimate= estimate,
data.name=paste("Interaction between",G1.arg,"and",G2.arg,"in association with",Y.arg),
statistic=stat,
p.value=pval,
parameters=parameters)
class(res) <- "htest"
return(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.