#' @importFrom stats as.formula cor.test model.matrix residuals
#' @importFrom coxme lmekin ranef
#' @importFrom stats sd
#' @importFrom MASS ginv
getFamilyBasedPCorResults <- function(Xind, Yind, Sinds, phen.df, covs.df, pedigrees, Phi2, Z,
minK=10, maxFC = 0.01, orthogonal=TRUE, fileID=NULL, dirToSave=NULL,
useGPU=FALSE, debug=TRUE, logFile="./log.txt") {
# pedigrees must be sampled
#######################
# Computing rho_X,Y|S #
#######################
N <- dim(phen.df)[1]
phen.names <- colnames(phen.df)
Xname <- phen.names[Xind]
Yname <- phen.names[Yind]
Snames <- phen.names[Sinds]
if (!is.null(covs.df)) {
Snames <- c(Snames, colnames(covs.df))
phen.df <- cbind(phen.df, covs.df)
}
if (debug) {
cat(paste0("Computing rho_", Xname, ",", Yname, "|{", paste0(Snames, collapse=","), "}"),
file=logFile, append=TRUE, sep="\n")
}
##################################
# Checking previous computations #
##################################
results_g_filepath <- ""
if (!is.null(fileID) && !is.null(dirToSave)) {
if (Xind < Yind) {
results_g_filepath <- paste0(dirToSave, fileID, "_results_g_", Xind, "_", Yind, "_", paste(Sinds, collapse=""), ".rds")
} else {
results_g_filepath <- paste0(dirToSave, fileID, "_results_g_", Yind, "_", Xind, "_", paste(Sinds, collapse=""), ".rds")
}
}
results_g_done = FALSE
if (results_g_filepath != "" && file.exists(results_g_filepath)) {
results_g <- readRDS(file=results_g_filepath) # loading results_g
ret = checkUnconfoundedEstimatesList(results_g, maxFC, N)
if (ret == "done") { # results are complete
results_g_done = TRUE
} else if (ret == "incomplete") {
if (debug) {
cat(paste0("Updating results_g for ", fileID, "_", Xind, "_", Yind, "_", paste(Sinds, collapse=""), "\n"),
file=logFile, append=TRUE, sep="\n")
}
} else if (ret == "error") {
file.remove(results_g_filepath)
if (debug) {
cat(paste0("Redoing results_g for ", fileID, "_", Xind, "_", Yind, "_", paste(Sinds, collapse=""), "\n"),
file=logFile, append=TRUE, sep="\n")
}
}
}
results_e_filepath <- ""
if (!is.null(fileID) && !is.null(dirToSave)) {
if (Xind < Yind) {
results_e_filepath <- paste0(dirToSave, fileID, "_results_e_", Xind, "_", Yind, "_", paste(Sinds, collapse=""), ".rds")
} else {
results_e_filepath <- paste0(dirToSave, fileID, "_results_e_", Yind, "_", Xind, "_", paste(Sinds, collapse=""), ".rds")
}
}
results_e_done = FALSE
if (results_e_filepath != "" && file.exists(results_e_filepath)) {
results_e <- readRDS(file=results_e_filepath) # loading results_e
ret = checkUnconfoundedEstimatesList(results_e, maxFC, N)
if (ret == "done") { # results are complete
results_e_done = TRUE
} else if (ret == "incomplete") {
if (debug) {
cat(paste0("Updating results_e for ", fileID, "_", Xind, "_", Yind, "_", paste(Sinds, collapse=""), "\n"),
file=logFile, append=TRUE, sep="\n")
}
} else if (ret == "error") {
file.remove(results_e_filepath)
if (debug) {
cat(paste0("Redoing results_e for ", fileID, "_", Xind, "_", Yind, "_", paste(Sinds, collapse=""), "\n"),
file=logFile, append=TRUE, sep="\n")
}
}
}
###################
# Model for X | S #
###################
if (debug) {
cat(paste0("Fitting ", Xname, " to ", "|{", paste0(Snames, collapse=","), "}"),
file=logFile, append=TRUE, sep="\n")
}
formulaXS <- as.formula(paste(Xname, " ~ 1 + (1 | pedigrees$id)"))
if (length(Sinds) > 0)
formulaXS <- as.formula(paste(Xname, "~ 1 + ", paste(paste(Snames, "+ "),collapse=""), " (1 | pedigrees$id)"))
fitFilePath <- ""
if (!is.null(fileID) && !is.null(dirToSave)) {
fitFilePath <- paste0(dirToSave, fileID, "_fit_", Xind, "_", paste(Sinds, collapse=""), ".rds")
}
if (file.exists(fitFilePath)) {
curfit <- readRDS(fitFilePath)
} else {
curfit = lmekin(formulaXS, data=phen.df, varlist=Phi2, method="REML")
if (fitFilePath != "") {
saveRDS(curfit, file=fitFilePath) # we are always saving an object named curfit
}
}
fitXS <- curfit
sigma2XS_g <- as.numeric(fitXS$vcoef)
if (sigma2XS_g < 1e-06) {
sigma2XS_g = 0
}
sigma2XS_e <- as.numeric(fitXS$sigma)^2
if (sigma2XS_e < 1e-06) {
sigma2XS_e = 0
}
sigma2XS <- (sigma2XS_g + sigma2XS_e)
#print(paste0("sigma2g_(", Xname, "| {", paste0(Snames, collapse=","), "}) = ", sigma2XS_g))
#print(paste0("sigma2e_(", Xname, "| {", paste0(Snames, collapse=","), "}) = ", sigma2XS_e))
if (!results_g_done || !results_e_done) {
if (sigma2XS_g != 0 || sigma2XS_e != 0) {
DXS <- diag(N) * sigma2XS_g # It is 0 if sigma2g=0
RXS <- diag(N) * sigma2XS_e # It is 0 if sigma2e=0
MXSinv <- (Z %*% DXS %*% t(Z) + RXS)
MXS <- ginv(MXSinv)
#V <- Minv * sigma2
Sformula <- paste(paste(Snames, " + "), collapse="")
Sformula <- as.formula(paste("~ ", Sformula, " 1"))
S <- model.matrix(Sformula, data=phen.df)
QXS <- MXS - MXS %*% S %*% ginv(t(S) %*% MXS %*% S) %*% t(S) %*% MXS
temp = Z %*% DXS %*% t(Z) %*% QXS
BgXS = temp %*% Z %*% DXS %*% t(Z) %*% t(temp) # B-A - corresponding to Zb - It is 0 if sigma2g=0
AgXS = temp %*% RXS %*% t(temp) #- corresponding to e - It is 0 if sigma2e=0.
BgXS = BgXS + AgXS # To be maximized in the same time that Ag is minimized
temp = RXS %*% QXS
BeXS = temp %*% RXS %*% t(temp) # B-A - corresponding to e - It is 0 if sigma2e=0
AeXS = temp %*% Z %*% DXS %*% t(Z) %*% t(temp) # corresponding to Zb = It is 0 if sigma2g=0
BeXS = BeXS + AeXS
ranefXS <- Z %*% DXS %*% t(Z) %*% QXS %*% phen.df[,Xname]
#head(ranef(fitXS)[[1]])
#head(ranefXS)
resXS <- RXS %*% QXS %*% phen.df[,Xname]
#head(residuals(fitXS))
#head(resXS)
margerrXS <- MXSinv %*% QXS %*% phen.df[,Xname]
#head(resXS + ranefXS)
#head(margerrXS)
}
}
##################
# Model for Y| S #
##################
if (debug) {
cat(paste0("Fitting ", Yname, " to ", "|{", paste0(Snames, collapse=","), "}"),
file=logFile, append=TRUE, sep="\n")
}
formulaYS <- as.formula(paste(Yname, " ~ 1 + (1 | pedigrees$id)"))
if (length(Sinds) > 0)
formulaYS <- as.formula(paste(Yname, "~ 1 + ", paste(paste(Snames, "+ "),collapse=""), " (1 | pedigrees$id)"))
fitFilePath <- ""
if (!is.null(fileID) && !is.null(dirToSave)) {
fitFilePath <- paste0(dirToSave, fileID, "_fit_", Yind, "_", paste(Sinds, collapse=""), ".rds")
}
if (file.exists(fitFilePath)) {
curfit <- readRDS(fitFilePath)
} else {
curfit = lmekin(formulaYS, data=phen.df, varlist=Phi2, method="REML")
if (fitFilePath != "") {
saveRDS(curfit, file=fitFilePath) # we are always saving an object named curfit
}
}
fitYS <- curfit
sigma2YS_g <- as.numeric(fitYS$vcoef)
if (sigma2YS_g < 1e-06) {
sigma2YS_g = 0
}
sigma2YS_e <- as.numeric(fitYS$sigma)^2
if (sigma2YS_e < 1e-06) {
sigma2YS_e = 0
}
sigma2YS <- (sigma2YS_g + sigma2YS_e)
if (!results_g_done || !results_e_done) {
if (sigma2YS_g != 0 || sigma2YS_e != 0) {
DYS <- diag(N) * sigma2YS_g
RYS <- diag(N) * sigma2YS_e
MYSinv <- (Z %*% DYS %*% t(Z) + RYS)
MYS <- ginv(MYSinv)
Sformula <- paste(paste(Snames, " + "), collapse="")
Sformula <- as.formula(paste("~ ", Sformula, " 1"))
S <- model.matrix(Sformula, data=phen.df)
QYS <- MYS - MYS %*% S %*% ginv(t(S) %*% MYS %*% S) %*% t(S) %*% MYS
temp = Z %*% DYS %*% t(Z) %*% QYS
BgYS = temp %*% Z %*% DYS %*% t(Z) %*% t(temp) # B-A - corresponding to Zb
AgYS = temp %*% RYS %*% t(temp) #- corresponding to e
BgYS = BgYS + AgYS # To be maximized in the same time that Ag is minimized
temp = RYS %*% QYS
BeYS = temp %*% RYS %*% t(temp) # B-A - corresponding to e
AeYS = temp %*% Z %*% DYS %*% t(Z) %*% t(temp) # corresponding to Zb
BeYS = BeYS + AeYS
ranefYS <- Z %*% DYS %*% t(Z) %*% QYS %*% phen.df[,Yname]
#head(ranef(fitYS)[[1]])
#head(ranefYS)
resYS <- RYS %*% QYS %*% phen.df[,Yname]
#head(residuals(fitYS))
#head(resYS)
margerrYS <- MYSinv %*% QYS %*% phen.df[,Yname]
#head(resYS + ranefYS)
#head(margerrYS)
}
}
margerrXS = ranef(fitXS)[[1]] + residuals(fitXS)
margerrYS = ranef(fitYS)[[1]] + residuals(fitYS)
rho_t = cor.test(margerrXS, margerrYS)
rho_t
#############
# rho_X,Y|S #
#############
if (!is.null(fileID) && debug) {
cat(paste0("Processing results_g for ", fileID, "_", Xind, "_", Yind, "_", paste(Sinds, collapse=""), "\n"),
file=logFile, append=TRUE, sep="\n")
}
if (!results_g_done) {
if (sigma2XS_g == 0 || sigma2YS_g == 0) {
# It is not possible to compute rho_g, since some standard deviantion is zero
results_g <- list()
results_g[[1]] <- list(k=N, p.value=1, estimate=0, sdx=0, sdy=0, FCxs=NaN, FCys=NaN)
} else {
# Since both sigma2XS_g and sigma2YS_g are not zero, all variables above were defined
if (sigma2XS_e == 0 && sigma2YS_e == 0) {
# If both sigma2XS_e and sigma2YS_e are zero, then there is no confounding in g
rho_g = cor.test(ranefXS, ranefYS)
results_g <- list()
results_g[[1]] <- list(k=N, p.value=rho_g$p.value, estimate=rho_g$estimate, sdx=sd(ranefXS), sdy=sd(ranefYS), FCxs=0, FCys=0)
} else {
preComputedResults <- NULL
if (results_g_filepath != "" && file.exists(results_g_filepath)) {
results_g <- readRDS(file=results_g_filepath) # loading existing results_g
preComputedResults <- results_g
}
if (debug) {
cat(paste0("Getting unconfounded estimates for ", Xname, ",", Yname, "|{", paste0(Snames, collapse=","), "}_g"),
file=logFile, append=TRUE, sep="\n")
}
results_g <- getUnconfoundedEstimates(BgXS, BgYS, AgXS, AgYS, maxFC, minK, orthogonal,
ranefXS, ranefYS, preComputedResults, useGPU,
debug=debug, logFile=logFile)
if (length(results_g) == 0) {
rho_g = cor.test(ranefXS, ranefYS)
results_g[[1]] <- list(k=N, p.value=rho_g$p.value, estimate=rho_g$estimate, sdx=sd(ranefXS), sdy=sd(ranefYS), FCxs=0, FCys=0)
}
}
}
# saving results_g
if (results_g_filepath != "" ) {
if (debug) {
cat(paste0("Saving results_g for ", fileID, "_", Xind, "_", Yind, "_", paste(Sinds, collapse=""), "\n"),
file=logFile, append=TRUE, sep="\n")
}
saveRDS(results_g, file=results_g_filepath)
}
}
if (!is.null(fileID) && debug) {
cat(paste0("Processing results_e for ", fileID, "_", Xind, "_", Yind, "_", paste(Sinds, collapse=""), "\n"),
file=logFile, append=TRUE, sep="\n")
}
if (!results_e_done) {
if (sigma2XS_e == 0 || sigma2YS_e == 0) {
# It is not possible to compute rho_e, since some standard deviantion is zero
results_e <- list()
results_e[[1]] <- list(k=N, p.value=1, estimate=0, sdx=0, sdy=0, FCxs=NaN, FCys=NaN)
} else {
# Since both sigma2XS_e and sigma2YS_e are not zero, all variables above were defined
if (sigma2XS_g == 0 && sigma2YS_g == 0) {
# If both sigma2XS_g and sigma2YS_g are zero, then there is no confounding in e
rho_e = cor.test(resXS, resYS)
results_e <- list()
results_e[[1]] <- list(k=N, p.value=rho_e$p.value, estimate=rho_e$estimate,
sdx=sd(resXS), sdy=sd(resYS), FCxs=0, FCys=0)
} else {
preComputedResults <- NULL
if (results_e_filepath != "" && file.exists(results_e_filepath)) {
results_e <- readRDS(file=results_e_filepath) # loading existing results_e
preComputedResults <- results_e
}
if (debug) {
cat(paste0("Getting unconfounded estimates for ", Xname, ",", Yname, "|{", paste0(Snames, collapse=","), "}_e"),
file=logFile, append=TRUE, sep="\n")
}
results_e <- getUnconfoundedEstimates(BeXS, BeYS, AeXS, AeYS, maxFC, minK,
orthogonal, resXS, resYS, preComputedResults, useGPU,
debug=debug, logFile=logFile)
if (length(results_e) == 0) {
rho_e = cor.test(resXS, resYS)
results_e[[1]] <- list(k=N, p.value=rho_e$p.value, estimate=rho_e$estimate,
sdx=sd(resXS), sdy=sd(resYS), FCxs=0, FCys=0)
}
}
}
# saving results_e
if (results_e_filepath != "" ) {
saveRDS(results_e, file=results_e_filepath)
if (debug) {
cat(paste0("Saving results_e for ", fileID, "_", Xind, "_", Yind, "_", paste(Sinds, collapse=""), "\n"),
file=logFile, append=TRUE, sep="\n")
}
}
}
return(list(results_g=results_g, results_e=results_e,
sigma2XS_e=sigma2XS_e, sigma2YS_e=sigma2YS_e,
sigma2XS_g=sigma2XS_g, sigma2YS_g=sigma2YS_g,
k_t=N, rho_t=rho_t$estimate, p.value_t=rho_t$p.value,
sdx_t=sd(margerrXS), sdy_t=sd(margerrYS),
Xind=Xind, Yind=Yind, Sinds=Sinds))
}
checkUnconfoundedEstimatesList <- function(results, maxFC, N) {
redoList <- c()
for (i in 1:length(results)) {
curResultsNames <- names(results[[i]])
if (is.null(curResultsNames) || length(curResultsNames) != 7 ||
sum(curResultsNames == c("k","p.value","estimate","sdx","sdy","FCxs","FCys")) != 7) {
redoList <- c(redoList, i)
}
}
if (length(redoList) > 0) {
return("error")
}
FCysVec <- as.numeric(unlist(lapply(results, '[', "FCys")))
FCxsVec <- as.numeric(unlist(lapply(results, '[', "FCxs")))
kVec <- as.numeric(unlist(lapply(results, '[', "k")))
if ((length(results) == 1 && is.nan(FCysVec) && is.nan(FCxsVec)) ||
(length(FCysVec) == length(FCxsVec) && (max(FCysVec, na.rm=T) >= maxFC || max(FCxsVec, na.rm=T) >= maxFC)) ||
(length(FCysVec) == length(FCxsVec) && max(kVec) == N)) {
return("done")
} else {
return("incomplete")
}
}
#' @importFrom stats sd
#' @importFrom R.utils withTimeout
getUnconfoundedEstimates <- function(Bxs, Bys, Axs, Ays, maxFC, minK, orthogonal, rx, ry,
preComputedResults=NULL, useGPU=FALSE, gpuCores=5,
debug=TRUE, logFile="./log.txt") {
n = dim(Bxs)[1]
k = minK
FCxs = FCys = 0
if (is.null(preComputedResults)) {
resultsList <- list()
} else {
resultsList <- preComputedResults
}
if (useGPU && requireNamespace("gpuR", quietly = TRUE)) {
if (debug) {
cat("\nUsing GPU in getUnconfoundedEstimates()..\n",
file=logFile, append=TRUE, sep="\n")
}
curK <- minK
kList <- c()
while(curK <= n) {
kList <- c(kList, curK)
curK <- round(curK + 0.1*curK)
}
funGPU <- function(k) {
gpuR::setContext(3L)
tryCatch({
W = mcrotate_orthogonal(Axs + Ays, Bxs + Bys, k, orthogonal, useGPU)
FCxs <- computeFC(W,Axs,Bxs,n,useGPU)
FCys <- computeFC(W,Ays,Bys,n,useGPU)
rot_rx = t(W)%*% rx
rot_ry = t(W)%*% ry
ctest = cor.test(rot_rx, rot_ry)
return(list(k=k, p.value=ctest$p.value, estimate=ctest$estimate, sdx=sd(rot_rx),
sdy=sd(rot_ry), FCxs=FCxs, FCys=FCys))
},
error = function(e) {
cat(paste0("Error: ", e),
file=logFile, append=TRUE, sep="\n")
return(list(e))
})
}
cl <- gpuR::makeCluster(gpuCores, outfile=logFile) # log can only be seen using outfile=""
gpuR::clusterExport(cl, c("funGPU", "mcrotate_orthogonal", "mymcrotate",
"myginv", "computeFC", "Bxs", "Bys", "Axs", "Ays",
"maxFC", "minK", "orthogonal", "rx", "ry", "useGPU", "n"), envir=environment())
gpuR::setContext(3L)
resultsList <- gpuR::parLapply(cl,kList,funGPU)
gpuR::stopCluster(cl)
} else {
nTries <- 0
i <- length(resultsList)+1
kVec <- as.numeric(unlist(lapply(resultsList, '[', "k")))
while (FCxs <= maxFC && FCys <= maxFC && k <= n && nTries <= 5) {
if (!(k %in% kVec)) {
retValue <- tryCatch({
W = mcrotate_orthogonal(Axs + Ays, Bxs + Bys, k, orthogonal, useGPU)
# W = withTimeout({mcrotate_orthogonal(Axs + Ays, Bxs + Bys, k, orthogonal)}, timeout=120)
if (debug) {
cat(paste0("k:", k, "\n"),
file=logFile, append=TRUE, sep="\n")
}
FCxs <- withTimeout({computeFC(W,Axs,Bxs,n,useGPU)}, timeout=60)
if (debug) {
cat(paste0("FCxs:", FCxs, "\n"),
file=logFile, append=TRUE, sep="\n")
}
FCys <- withTimeout({computeFC(W,Ays,Bys,n,useGPU)}, timeout=60)
if (debug) {
cat(paste0("FCys:", FCys, "\n"),
file=logFile, append=TRUE, sep="\n")
}
rot_rx = t(W)%*% rx
rot_ry = t(W)%*% ry
ctest = cor.test(rot_rx, rot_ry)
if (debug) {
cat(paste0("rho:", ctest$estimate, ". p-value:", ctest$p.value, "\n"),
file=logFile, append=TRUE, sep="\n")
}
resultsList[[i]] <- list(k=k, p.value=ctest$p.value, estimate=ctest$estimate, sdx=sd(rot_rx), sdy=sd(rot_ry), FCxs=FCxs, FCys=FCys)
i = i+1
0
},
error = function(e) {
if (debug) {
cat(paste0("Error for k=", k, ", after ", nTries, " tries: ", e),
file=logFile, append=TRUE, sep="\n")
}
return(-1)
})
if (retValue == -1) {
nTries <- nTries + 1
} else {
nTries <- 0
}
}
k = round(k + 0.1*k)
}
}
return(resultsList)
}
computeFC <- function(W, A, B, n, useGPU=FALSE) {
if (useGPU && requireNamespace("gpuR", quietly = TRUE)) {
A <- gpuR::gpuMatrix(A)
B <- gpuR::gpuMatrix(B)
W <- gpuR::gpuMatrix(W)
}
FC <- sum(as.vector(diag(myginv(as.matrix(t(W) %*% B %*% W), useGPU) %*% (t(W) %*% A %*% W))))/n
return(FC)
}
myginv <- function (X, useGPU = FALSE) {
tol = sqrt(.Machine$double.eps)
if (useGPU && requireNamespace("gpuR", quietly = TRUE)) {
X <- gpuR::gpuMatrix(X)
}
Xsvd <- svd(X)
decOrder <- order(as.vector(Xsvd$d), decreasing=TRUE)
Xsvd.d <- as.vector(Xsvd$d)[decOrder]
Xsvd.u <- as.matrix(Xsvd$u)[,decOrder]
Xsvd.v <- as.matrix(Xsvd$v)[,decOrder]
# X - Xsvd.u %*% diag(Xsvd.d) %*% t(Xsvd.v)
Positive <- Xsvd.d > max(tol * Xsvd.d[1L], 0)
ds <- diag(1/Xsvd.d[Positive])
us <- as.matrix(Xsvd.u[, Positive, drop=FALSE])
vs <- as.matrix(Xsvd.v[, Positive, drop=FALSE])
if (useGPU && requireNamespace("gpuR", quietly = TRUE)) {
ds <- gpuR::gpuMatrix(ds)
us <- gpuR::gpuMatrix(us)
vs <- gpuR::gpuMatrix(vs)
}
X.ginv <- as.matrix(vs %*% ds %*% t(us))
return(X.ginv)
}
#' @importFrom Matrix rankMatrix
mymcrotate <- function(A, B, s, useGPU = FALSE) {
r <- rankMatrix(B)
if(is.null(s)) s <- r
if (useGPU && requireNamespace("gpuR", quietly = TRUE)) {
A <- gpuR::gpuMatrix(A)
B <- gpuR::gpuMatrix(B)
}
B.svd <- svd(B)
decOrder <- order(as.vector(B.svd$d), decreasing=TRUE)
B.svd.d <- as.vector(B.svd$d)[decOrder]
B.svd.u <- as.matrix(B.svd$u)[,decOrder]
B.svd.v <- as.matrix(B.svd$v)[,decOrder]
svDiag <- diag(1/sqrt(B.svd.d[1:r]))
Tr <- B.svd.u[, 1:r]
if (useGPU && requireNamespace("gpuR", quietly = TRUE)) {
Tr <- gpuR::gpuMatrix(Tr)
svDiag <- gpuR::gpuMatrix(svDiag)
}
A.star <- svDiag %*% t(Tr) %*% A %*% Tr %*% svDiag
# Note that B.star == I:
# svDiag %*% t(Tr) %*% B %*% Tr %*% svDiag )
A.star.svd <- svd(A.star) #fast.svd(A.star)
decOrder <- order(as.vector(A.star.svd$d), decreasing=TRUE)
A.star.svd.d <- as.vector(A.star.svd$d)[decOrder]
A.star.svd.u <- as.matrix(A.star.svd$u)[,decOrder]
A.star.svd.v <- as.matrix(A.star.svd$v)[,decOrder]
index <- seq(r, length.out = s, by = -1)
index <- sort(index[index >= 0])
AstarU <- A.star.svd.u[,index]
if (useGPU && requireNamespace("gpuR", quietly = TRUE)) {
AstarU <- gpuR::gpuMatrix(AstarU)
}
W <- Tr %*% svDiag %*% AstarU
return(as.matrix(W))
}
mcrotate_orthogonal <- function (A, B, s, orthogonal, useGPU=FALSE) {
W <- mymcrotate(A, B, s, useGPU)
if(orthogonal) {
# NOTE: qr.gpuMatrix(W): non-square matrix not currently supported for 'qr'
# if (useGPU) {
# require(gpuR)
# W <- gpuMatrix(W)
# }
W.QR = qr(W)
W.Q = qr.Q(W.QR)
# W.R = qr.R(W.QR)
W = W.Q
}
return(as.matrix(W))
}
#' @importFrom utils combn
selectBestUDGPCorMatrices <- function(phen.p, alpha, maxFC, fileID, resultsDir,
savePlots=TRUE, variables=NULL, debug=TRUE, logFile="./log.txt") {
nei_pCor_g_estimates <- matrix(NA, phen.p, phen.p)
if (!is.null(variables) && length(variables) == phen.p) {
colnames(nei_pCor_g_estimates) <- variables
rownames(nei_pCor_g_estimates) <- variables
}
nei_pCor_g_pvalues <- nei_pCor_g_estimates
nei_pCor_g_ks <- nei_pCor_g_estimates
nei_pCor_e_estimates <- nei_pCor_g_estimates
nei_pCor_e_pvalues <- nei_pCor_g_estimates
nei_pCor_e_ks <- nei_pCor_g_estimates
nei_pCor_t_estimates <- nei_pCor_g_estimates
nei_pCor_t_pvalues <- nei_pCor_g_estimates
pseq <- 1:phen.p
xyinds <- combn(pseq,2)
for (xyind in 1:dim(xyinds)[2]) {
Xind <- xyinds[1,xyind]
Yind <- xyinds[2,xyind]
Sinds <- pseq[-c(Xind, Yind)]
resultFile <- paste0(resultsDir, fileID, "_", Xind, "_", Yind, "_", paste(Sinds, collapse=""), ".rds")
if (file.exists(resultFile)) {
if (debug) {
cat(paste0("Processing ", resultFile),
file=logFile, append=TRUE, sep="\n")
}
curResult <- readRDS(resultFile)
if (savePlots) {
saveEdgeResultsPlot(curResult, resultsDir, fileID, Xind, Yind, Sinds, alpha, maxFC=maxFC,
debug=debug, logFile=logFile)
}
resultsList_g <- curResult$results_g
resultsList_e <- curResult$results_e
if (debug) {
cat(paste0("Selecting indices for edge ", Xind, "-", Yind),
file=logFile, append=TRUE, sep="\n")
}
selectedInds <- selectBestResult(curResult$results_g, curResult$results_e, curResult$rho_t,
curResult$p.value_t, alpha, maxFC=maxFC, debug=debug, logFile=logFile)
if (debug) {
cat(paste0("done for edge ", Xind, "-", Yind),
file=logFile, append=TRUE, sep="\n")
}
nei_pCor_g_estimates[Xind,Yind] <- resultsList_g[[selectedInds[1]]]$estimate
nei_pCor_g_pvalues[Xind,Yind] <- resultsList_g[[selectedInds[1]]]$p.value
nei_pCor_g_ks[Xind,Yind] <- resultsList_g[[selectedInds[1]]]$k
nei_pCor_g_estimates[Yind,Xind] <- nei_pCor_g_estimates[Xind,Yind]
nei_pCor_g_pvalues[Yind,Xind] <- nei_pCor_g_pvalues[Xind,Yind]
nei_pCor_g_ks[Yind,Xind] <- nei_pCor_g_ks[Xind,Yind]
nei_pCor_e_estimates[Xind,Yind] <- resultsList_e[[selectedInds[2]]]$estimate
nei_pCor_e_pvalues[Xind,Yind] <- resultsList_e[[selectedInds[2]]]$p.value
nei_pCor_e_ks[Xind,Yind] <- resultsList_e[[selectedInds[2]]]$k
nei_pCor_e_estimates[Yind,Xind] <- nei_pCor_e_estimates[Xind,Yind]
nei_pCor_e_pvalues[Yind,Xind] <- nei_pCor_e_pvalues[Xind,Yind]
nei_pCor_e_ks[Yind,Xind] <- nei_pCor_e_ks[Xind,Yind]
nei_pCor_t_estimates[Xind,Yind] <- curResult$rho_t
nei_pCor_t_pvalues[Xind,Yind] <- curResult$p.value_t
nei_pCor_t_estimates[Yind,Xind] <- nei_pCor_t_estimates[Xind,Yind]
nei_pCor_t_pvalues[Yind,Xind] <- nei_pCor_t_pvalues[Xind,Yind]
} else {
if (debug) {
cat(paste0("Could not find file: ", resultFile),
file=logFile, append=TRUE, sep="\n")
}
}
}
return(list(pCor_g=list(estimates=nei_pCor_g_estimates, pvalues=nei_pCor_g_pvalues, k=nei_pCor_g_ks),
pCor_e=list(estimates=nei_pCor_e_estimates, pvalues=nei_pCor_e_pvalues, k=nei_pCor_e_ks),
pCor_t=list(estimates=nei_pCor_t_estimates, pvalues=nei_pCor_t_pvalues)))
}
#' @importFrom grDevices dev.off png
#' @importFrom graphics abline lines par plot
saveEdgeResultsPlot <- function(curResult, folderToSave, fileID, Xind, Yind, Sinds, alpha, maxFC,
debug=TRUE, logFile="./log.txt") {
if (length(curResult$results_g) == 0 || length(curResult$results_e) == 0) {
if (debug) {
cat(paste0("Results for edge ", Xind, "-", Yind, "_", paste(Sinds, collapse=""), " are empty!"),
file=logFile, append=TRUE, sep="\n")
}
return
}
selectedInds <- selectBestResult(curResult$results_g, curResult$results_e, curResult$rho_t,
curResult$p.value_t, alpha, maxFC=maxFC, debug=debug, logFile=logFile)
png(filename=paste0(folderToSave, fileID, "_edge_",Xind,"-",Yind, "_", paste(Sinds, collapse=""), ".png"),
width=1280, height=768)
par(mfcol=c(1,2), mar=c(5.1,4.1,7.1,2.1))
for (type in c("g", "e")) {
resultsList <- list()
if (type == "g") {
resultsList <- curResult$results_g
estSigma2_XS <- curResult$sigma2XS_g
estSigma2_YS <- curResult$sigma2YS_g
selectedIndex <- selectedInds[1]
} else if (type == "e") {
resultsList <- curResult$results_e
estSigma2_XS <- curResult$sigma2XS_e
estSigma2_YS <- curResult$sigma2YS_e
selectedIndex <- selectedInds[2]
}
if (length(resultsList) > 1) {
pvaluesVec <- as.numeric(unlist(lapply(resultsList, '[', "p.value")))
estimatesVec <- as.numeric(unlist(lapply(resultsList, '[', "estimate")))
sdxVec <- as.numeric(unlist(lapply(resultsList, '[', "sdx")))
sdyVec <- as.numeric(unlist(lapply(resultsList, '[', "sdy")))
FCysVec <- as.numeric(unlist(lapply(resultsList, '[', "FCys")))
FCxsVec <- as.numeric(unlist(lapply(resultsList, '[', "FCxs")))
if (debug) {
cat(paste0("Edge ", Xind, "-", Yind, type, "; est: ", round(estimatesVec[selectedIndex],4),
" p=", round(pvaluesVec[selectedIndex],4)),
file=logFile, append=TRUE, sep="\n")
}
plot(FCysVec + FCxsVec, -log10(pvaluesVec),
col="blue", type="l", xlim=c(0,2*maxFC),ylim=c(-10,15),
main=paste0("Edge ", Xind, "-", Yind, type,
"\nEstimated pCor_t: ", round(curResult$rho_t,4), " (P=", round(curResult$p.value_t,4), ")",
"\nEstimated sigma2_X|S: ", round(estSigma2_XS,4), ", Estimated sigma2_Y|S: ", round(estSigma2_YS,4),"\nEstimated pCor_", type, " (k=", resultsList[[selectedIndex]][[1]],"): ",
round(estimatesVec[selectedIndex],4), " (P=", round(pvaluesVec[selectedIndex],4),")"))
lines(FCysVec + FCxsVec, estimatesVec*10, col="red")
abline(v=(FCysVec + FCxsVec)[selectedIndex], col="green")
abline(h=0, col="gray")
abline(h=-log10(0.05), col="cyan")
} else {
if (debug) {
cat(paste0("Edge ", Xind, "-", Yind, type, "; est: ",
round(resultsList[[1]]$estimate,4), " p=", round(resultsList[[1]]$p.value,4)),
file=logFile, append=TRUE, sep="\n")
}
plot(sum(resultsList[[1]]$FCxs, resultsList[[1]]$FCys, na.rm=TRUE),
-log10(resultsList[[1]]$p.value),
col="blue", type="l", ylim=c(-10,15), xlim=c(0,2*maxFC),
main=paste0("Edge ", Xind, "-", Yind, type,
"\nEstimated pCor_t: ", round(curResult$rho_t,4), " (P=", round(curResult$p.value_t,4), ")",
"\nEstimated sigma2_X|S: ", round(estSigma2_XS,4), ", Estimated sigma2_Y|S: ", round(estSigma2_YS,4),"\nEstimated pCor_", type, " (k=", resultsList[[1]]$k,"): ",
round(resultsList[[1]]$estimate,4), " (P=", round(resultsList[[1]]$p.value,4),")"))
lines(sum(resultsList[[1]]$FCxs, resultsList[[1]]$FCys, na.rm=TRUE), resultsList[[1]]$estimate*10, col="red")
abline(h=0, col="gray")
abline(h=-log10(0.05), col="cyan")
}
}
dev.off()
}
selectBestResult <- function(resultsList_g, resultsList_e, rho_t, pvalue_t, alpha, maxFC=0.01,
debug=TRUE, logFile="./log.txt") {
if (length(resultsList_g) == 0 || length(resultsList_e) == 0) {
return(c(-1,-1))
}
if (debug) {
cat(paste0("Selecting best result..."),
file=logFile, append=TRUE, sep="\n")
}
FCxsVec_g <- as.numeric(unlist(lapply(resultsList_g, '[', "FCxs")))
FCysVec_g <- as.numeric(unlist(lapply(resultsList_g, '[', "FCys")))
validIds_g <- intersect(which(FCxsVec_g <= maxFC), which(FCysVec_g <= maxFC))
if (length(validIds_g) == 0 && length(FCxsVec_g) >= 1 && !is.nan(FCxsVec_g[1]))
validIds_g <- 1
FCxsVec_g <- FCxsVec_g[validIds_g]
FCysVec_g <- FCysVec_g[validIds_g]
pvaluesVec_g <- as.numeric(unlist(lapply(resultsList_g, '[', "p.value")))
if (length(validIds_g) > 0) {
pvaluesVec_g <- pvaluesVec_g[validIds_g]
}
estimatesVec_g <- as.numeric(unlist(lapply(resultsList_g, '[', "estimate")))
if (length(validIds_g) > 0) {
estimatesVec_g <- estimatesVec_g[validIds_g]
}
FCxsVec_e <- as.numeric(unlist(lapply(resultsList_e, '[', "FCxs")))
FCysVec_e <- as.numeric(unlist(lapply(resultsList_e, '[', "FCys")))
validIds_e <- intersect(which(FCxsVec_e <= maxFC), which(FCysVec_e <= maxFC))
if (length(validIds_e) == 0 && length(FCxsVec_e) >= 1 && !is.nan(FCxsVec_e[1]))
validIds_e <- 1
FCxsVec_e <- FCxsVec_e[validIds_e]
FCysVec_e <- FCysVec_e[validIds_e]
pvaluesVec_e <- as.numeric(unlist(lapply(resultsList_e, '[', "p.value")))
if (length(validIds_e) > 0) {
pvaluesVec_e <- pvaluesVec_e[validIds_e]
}
estimatesVec_e <- as.numeric(unlist(lapply(resultsList_e, '[', "estimate")))
if (length(validIds_e) > 0) {
estimatesVec_e <- estimatesVec_e[validIds_e]
}
n_ests_g <- length(estimatesVec_g)
signs_g <- table(sign(estimatesVec_g))
n_ests_e <- length(estimatesVec_e)
signs_e <- table(sign(estimatesVec_e))
selected_index_g = -1
selected_index_e = -1
if (pvalue_t > alpha) {
# If there is a zero total partial correlation
if ((!is.na(signs_g['-1']) && signs_g['-1'] >= sum(signs_g)/4 &&
!is.na(signs_e['1']) && signs_e['1'] >= sum(signs_e)/4) ||
(!is.na(signs_e['-1']) && signs_e['-1'] >= sum(signs_e)/4 &&
!is.na(signs_g['1']) && signs_g['1'] >= sum(signs_g)/4)) {
# If all g and e estimates have oppositve signs,
if (min(pvaluesVec_g) > alpha && min(pvaluesVec_e) > alpha) {
# Then or both are zero,
if (debug)
cat("Total is zero, with g and e zero.",
file=logFile, append=TRUE, sep="\n")
selected_index_g <- which(pvaluesVec_g == max(pvaluesVec_g))[1]
selected_index_e <- which(pvaluesVec_e == max(pvaluesVec_e))[1]
} else {
if (debug)
cat("Total is zero, and g and e have opposite signs.",
file=logFile, append=TRUE, sep="\n")
# or both effects are significant, but are cancelled out.
selected_index_g <- getStableEstimateIdx(FCxsVec_g, FCysVec_g, pvaluesVec_g, estimatesVec_g)
if (pvaluesVec_g[selected_index_g] > alpha) {
selected_index_g <- which(pvaluesVec_g == min(pvaluesVec_g))[1]
}
selected_index_e <- getStableEstimateIdx(FCxsVec_e, FCysVec_e, pvaluesVec_e, estimatesVec_e)
if (pvaluesVec_e[selected_index_e] > alpha) {
selected_index_e <- which(pvaluesVec_e == min(pvaluesVec_e))[1]
}
}
} else {
# then it is expected that both g and e estimates are zero
if ((length(pvaluesVec_g) == 1 && pvaluesVec_g[1] > alpha) ||
(intervalPercentage(FCxsVec_g, FCysVec_g, which(pvaluesVec_g > alpha)) > 0.3)) {
if (debug)
cat("Total is zero, g and e have same signal, and g is zero.",
file=logFile, append=TRUE, sep="\n")
selected_index_g <- which(pvaluesVec_g == max(pvaluesVec_g))[1]
} else {
if (debug)
cat("WARNING: Total is zero, and g is the most stable estimate.",
file=logFile, append=TRUE, sep="\n")
selected_index_g <- getStableEstimateIdx(FCxsVec_g, FCysVec_g, pvaluesVec_g, estimatesVec_g)
}
if ((length(pvaluesVec_e) == 1 && pvaluesVec_e[1] > alpha) ||
(intervalPercentage(FCxsVec_e, FCysVec_e, which(pvaluesVec_e > alpha)) > 0.3)) {
if (debug)
cat("Total is zero, g and e have same signal, and e is zero.",
file=logFile, append=TRUE, sep="\n")
selected_index_e <- which(pvaluesVec_e == max(pvaluesVec_e))[1]
} else {
if (debug)
cat("WARNING: Total is zero, and e is the most stable estimate.",
file=logFile, append=TRUE, sep="\n")
selected_index_e <- getStableEstimateIdx(FCxsVec_e, FCysVec_e, pvaluesVec_e, estimatesVec_e)
}
}
} else {
# If there is a non-zero total partial correlation
# Then g or e (or both) should be non-zero
if ((!is.na(signs_g['-1']) && signs_g['-1'] >= sum(signs_g)/5 &&
!is.na(signs_e['1']) && signs_e['1'] >= sum(signs_e)/5) ||
(!is.na(signs_e['-1']) && signs_e['-1'] >= sum(signs_e)/5 &&
!is.na(signs_g['1']) && signs_g['1'] >= sum(signs_g)/5)) {
# If all g and e estimates have oppositve signs,
if (debug)
cat("Total is nonzero, g and e have opposite signs.",
file=logFile, append=TRUE, sep="\n")
selected_index_g <- getStableEstimateIdx(FCxsVec_g, FCysVec_g, pvaluesVec_g, estimatesVec_g)
if (pvaluesVec_g[selected_index_g] > alpha) {
#selected_index_g <- which(pvaluesVec_g == min(pvaluesVec_g))[1]
finalIds <- ceiling(length(estimatesVec_g)/2):length(estimatesVec_g)
selected_index_g <- finalIds[which.min(pvaluesVec_g[finalIds])][1]
}
if (any(signs_g == length(estimatesVec_g)) &&
pvaluesVec_g[selected_index_g] > alpha) {
selected_index_g <- which.min(pvaluesVec_g)[1]
}
selected_index_e <- getStableEstimateIdx(FCxsVec_e, FCysVec_e, pvaluesVec_e, estimatesVec_e)
if (pvaluesVec_e[selected_index_e] > alpha) {
finalIds <- ceiling(length(estimatesVec_e)/2):length(estimatesVec_e)
selected_index_e <- finalIds[which.min(pvaluesVec_e[finalIds])][1]
}
if (any(signs_e == length(estimatesVec_e)) &&
pvaluesVec_e[selected_index_e] > alpha) {
selected_index_e <- which.min(pvaluesVec_e)[1]
}
} else {
if ((min(pvaluesVec_g) > alpha)) {
if (intervalPercentage(FCxsVec_g, FCysVec_g, which(abs(estimatesVec_g) > 0.1)) > 0.20) {
if (debug)
cat("Total is nonzero, g zero but estimates are moderate to high",
file=logFile, append=TRUE, sep="\n")
selected_index_g <- which(pvaluesVec_g == min(pvaluesVec_g))[1]
} else {
# g is zero: or all p-values are greater than alpha or
# the g estimates cross the zero and most of them have p-value greater than alpha
if (debug)
cat("Total is nonzero, but g is zero.",
file=logFile, append=TRUE, sep="\n")
selected_index_g <- which(pvaluesVec_g == max(pvaluesVec_g))[1]
}
}
if ((min(pvaluesVec_e) > alpha)) {
if (intervalPercentage(FCxsVec_e, FCysVec_e, which(abs(estimatesVec_e) > 0.1)) > 0.20) {
if (debug)
cat("Total is nonzero, e zero but estimates are moderate to high",
file=logFile, append=TRUE, sep="\n")
selected_index_e <- which(pvaluesVec_e == min(pvaluesVec_e))[1]
} else {
# e is zero: or all p-values are greater than alpha or
# the e estimates cross the zero and most of them have p-value greater than alpha
if (debug)
cat("Total is nonzero, but e is zero.",
file=logFile, append=TRUE, sep="\n")
selected_index_e <- which(pvaluesVec_e == max(pvaluesVec_e))[1]
}
}
if (selected_index_e == -1 || selected_index_g == -1) {
if (selected_index_e != -1 && selected_index_g == -1) {
# Since e is zero, g is probably significant.
# We will make the selction giving preference to the most stable one.
candidate_g <- getStableEstimateIdx(FCxsVec_g, FCysVec_g, pvaluesVec_g, estimatesVec_g)
if (pvaluesVec_g[candidate_g] <= alpha) {
if (debug)
cat("Total is nonzero, g is nonzero (the most stable estimate).",
file=logFile, append=TRUE, sep="\n")
selected_index_g <- candidate_g
} else {
if (debug)
cat("Total is nonzero, g may be nonzero (the first significant one).",
file=logFile, append=TRUE, sep="\n")
finalIds <- ceiling(length(estimatesVec_g)/2):length(estimatesVec_g)
selected_index_g <- finalIds[which.min(pvaluesVec_g[finalIds])][1]
}
} else if (selected_index_g != -1 && selected_index_e == -1) {
# Since g is zero, e is probably significant.
# We will make the selction giving preference to the most stable one.
candidate_e <- getStableEstimateIdx(FCxsVec_e, FCysVec_e, pvaluesVec_e, estimatesVec_e)
if (pvaluesVec_e[candidate_e] <= alpha) {
if (debug)
cat("Total is nonzero, e is nonzero (the most stable estimate).",
file=logFile, append=TRUE, sep="\n")
selected_index_e <- candidate_e
} else {
if (debug)
cat("Total is nonzero, e may be nonzero (the first significant one).",
file=logFile, append=TRUE, sep="\n")
finalIds <- ceiling(length(estimatesVec_e)/2):length(estimatesVec_e)
selected_index_e <- finalIds[which.min(pvaluesVec_e[finalIds])][1]
}
} else if (selected_index_g == -1 && selected_index_e == -1) {
# I can be more strict, because there is not a problematic coufounding,
# but I have to select at least one componente that is significant.
candidate_g <- getStableEstimateIdx(FCxsVec_g, FCysVec_g, pvaluesVec_g, estimatesVec_g)
candidate_e <- getStableEstimateIdx(FCxsVec_e, FCysVec_e, pvaluesVec_e, estimatesVec_e)
if (pvaluesVec_e[candidate_e] <= alpha ||
pvaluesVec_g[candidate_g] <= alpha) {
if (pvaluesVec_e[candidate_e] <= alpha) {
if (debug)
cat("Total is nonzero, e is nonzero (the most stable one).",
file=logFile, append=TRUE, sep="\n")
selected_index_e <- candidate_e
} else if (length(estimatesVec_e) > 1 &&
max(signs_e) == sum(signs_e) &&
length(which(abs(estimatesVec_e) > 0.05)) == length(estimatesVec_e)) {
selected_index_e <- which(pvaluesVec_e == min(pvaluesVec_e))[1]
} else {
if (debug)
cat("Total is nonzero, e is zero.",
file=logFile, append=TRUE, sep="\n")
selected_index_e <- which(pvaluesVec_e == max(pvaluesVec_e))[1]
}
if (pvaluesVec_g[candidate_g] <= alpha) {
if (debug)
cat("Total is nonzero, g is nonzero (the most stable one).",
file=logFile, append=TRUE, sep="\n")
selected_index_g <- candidate_g
} else if (length(estimatesVec_g) > 1 &&
max(signs_g) == sum(signs_g) &&
length(which(abs(estimatesVec_g) > 0.05)) == length(estimatesVec_g)) {
selected_index_g <- which(pvaluesVec_g == min(pvaluesVec_g))[1]
} else {
if (debug)
cat("Total is nonzero, g is zero.",
file=logFile, append=TRUE, sep="\n")
selected_index_g <- which(pvaluesVec_g == max(pvaluesVec_g))[1]
}
} else {
# Then g and e have significant effect for some k (they were not set as zero before)
# but they are not the most stable ones.
if ((intervalPercentage(FCxsVec_e, FCysVec_e,
which(pvaluesVec_e <= alpha)) > 0.20) ||
(intervalPercentage(FCxsVec_g, FCysVec_g,
which(pvaluesVec_g <= alpha)) > 0.20)) {
if (intervalPercentage(FCxsVec_e, FCysVec_e,
which(pvaluesVec_e <= alpha)) > 0.20) {
selected_index_e <- which(pvaluesVec_e == min(pvaluesVec_e))[1]
} else {
selected_index_e <- which(pvaluesVec_e == max(pvaluesVec_e))[1]
}
if (intervalPercentage(FCxsVec_g, FCysVec_g,
which(pvaluesVec_g <= alpha)) > 0.20) {
selected_index_g <- which(pvaluesVec_g == min(pvaluesVec_g))[1]
} else {
selected_index_g <- which(pvaluesVec_g == max(pvaluesVec_g))[1]
}
} else {
if (debug)
cat("Total is nonzero, g and e are nonzero (the first significant ones).",
file=logFile, append=TRUE, sep="\n")
selected_index_e <- which(pvaluesVec_e == min(pvaluesVec_e))[1]
selected_index_g <- which(pvaluesVec_g == min(pvaluesVec_g))[1]
}
}
}
}
}
}
if (selected_index_g == -1) {
if (debug) {
cat("WARNING: unexpected case for g!",
file=logFile, append=TRUE, sep="\n")
}
selected_index_g <- getStableEstimateIdx(FCxsVec_g, FCysVec_g, pvaluesVec_g, estimatesVec_g)
}
if (selected_index_e == -1) {
if (debug) {
cat("WARNING: unexpected case for e!",
file=logFile, append=TRUE, sep="\n")
}
selected_index_e <- getStableEstimateIdx(FCxsVec_e, FCysVec_e, pvaluesVec_e, estimatesVec_e)
}
return(c(selected_index_g, selected_index_e))
}
intervalPercentage <- function(FCxsVec, FCysVec, indices) {
if (length(indices) == 0)
return(0)
sumFCs <- FCxsVec + FCysVec
sequences <- c()
newseq = TRUE
for (i in 1:length(indices)) {
if (newseq) {
if (indices[i] > 1)
sequences <- c(sequences, (sumFCs[indices[i]]+sumFCs[indices[i]-1])/2)
else
sequences <- c(sequences, sumFCs[indices[i]])
newseq = FALSE
}
if ((i < length(indices) && indices[i+1] != indices[i]+1) ||
(i == length(indices))) {
if (indices[i] < length(FCxsVec))
sequences <- c(sequences, (sumFCs[indices[i]]+sumFCs[indices[i]+1])/2)
else
sequences <- c(sequences, sumFCs[indices[i]])
newseq = TRUE
}
}
totalInterval <- 0
i = 2
while (i <= length(sequences)) {
totalInterval <- totalInterval + (sequences[i] - sequences[i-1])
i = i+2
}
sumFCsInterval <- sumFCs[length(sumFCs)] - sumFCs[1]
if (sumFCsInterval == 0)
return(0)
return(totalInterval/sumFCsInterval)
}
# both lists must be sorted!
getNClosestPoints <- function(pointList, allPoints) {
closestPoints <- c()
i = 1
for (valueId in 1:length(allPoints)) {
if (i > length(pointList))
break
if (allPoints[valueId] == pointList[i]) {
closestPoints <- c(closestPoints, allPoints[valueId])
i = i+1
} else if (allPoints[valueId] > pointList[i]) {
if (abs(allPoints[valueId] - pointList[i]) < abs(allPoints[valueId-1] - pointList[i])) {
closestPoints <- c(closestPoints, allPoints[valueId])
} else {
closestPoints <- c(closestPoints, allPoints[valueId-1])
}
i = i+1
}
}
closestPoints <- c(closestPoints, allPoints[valueId])
return(closestPoints)
}
#' @importFrom stats var
getStableEstimateIdx <- function(FCxsVec, FCysVec, pvaluesVec, estimatesVec) {
if (length(estimatesVec) == 1) {
return(1)
}
limiters <- seq((FCysVec + FCxsVec)[1], (FCysVec + FCxsVec)[length(FCysVec + FCxsVec)], length.out=6)
limiters <- getNClosestPoints(limiters, (FCysVec + FCxsVec))
limitersIds <- which((FCysVec + FCxsVec) %in% limiters)
varsVec <- c()
for (i in 1:(length(limitersIds)-1)) {
varsVec <- c(varsVec, var(estimatesVec[limitersIds[i]:limitersIds[i+1]]))
}
stableIds <- which(round(varsVec,4) <= round(summary(varsVec)[4],4))
finalStableIds <- c()
for (id in stableIds) {
finalStableIds <- c(finalStableIds, limitersIds[id]:limitersIds[id+1])
}
finalStableIds <- unique(finalStableIds)
selected_index <- which(pvaluesVec[finalStableIds] == min(pvaluesVec[finalStableIds]))[1]
selected_index <- finalStableIds[selected_index]
return(selected_index)
}
#' @importFrom stats p.adjust
adjustSymmetricPValuesMatrix <- function(pvaluesMatrix, correction="fdr") {
lowind <- which(lower.tri(pvaluesMatrix), arr.ind=TRUE)
uppind <- which(upper.tri(pvaluesMatrix), arr.ind=TRUE)
adjp <- p.adjust(pvaluesMatrix[lowind], method=correction)
pvaluesMatrix[lowind] <- adjp
pvaluesMatrix[uppind] <- adjp
return(pvaluesMatrix)
}
getAdjMatrixFromSymmetricPCorPvalues <- function(pvaluesMatrix, alpha=0.05, correction=NULL) {
nVertices <- dim(pvaluesMatrix)[1]
if (!is.null(correction)) {
pvaluesMatrix <- adjustSymmetricPValuesMatrix(pvaluesMatrix, correction)
}
edges <- which(pvaluesMatrix <= alpha, arr.ind=TRUE)
adjMatrix <- matrix(0, nVertices, nVertices)
adjMatrix[edges] <- 1
colnames(adjMatrix) <- colnames(pvaluesMatrix)
row.names(adjMatrix) <- colnames(pvaluesMatrix)
return(list(adjacency.matrix=adjMatrix, adjusted.pvalues=pvaluesMatrix))
}
getAdjMatrixFromSymmetricPCor <- function(pCor, alpha, correction="fdr") {
pvaluesMatrix <- pCor$pvalues
diag(pvaluesMatrix) <- 1
return(getAdjMatrixFromSymmetricPCorPvalues(pvaluesMatrix, alpha=0.05, correction))
}
#' @importFrom kinship2 kinship
getKinshipList <- function(pedigrees, sampled=NULL, debug=TRUE, logFile=NULL) {
if (!is.null(sampled) && length(sampled) != dim(pedigrees)[1]) {
cat("Sampled vector size and number of rows of pedigrees data frame must be equal",
file="log_error.log", append=TRUE, sep="\n")
return(NULL)
}
if (!is.null(sampled))
pedigrees <- cbind(pedigrees, sampled=sampled)
famids = unique(pedigrees$famid)
Phi_list <- list()
if (!is.null(sampled))
sampled_Phi_list <- list()
for (famid in famids) {
ped <- pedigrees[pedigrees$famid == famid,]
Phi_list[[famid]] <- kinship(id=ped$id, dadid=ped$dadid, momid=ped$momid, sex=ped$sex)
if (!is.null(sampled)) {
sampled_ids <- which(ped$sampled == 1)
sampled_Phi_list[[famid]] <- Phi_list[[famid]][sampled_ids, sampled_ids]
}
}
if (!is.null(sampled))
return(list(Phi_list=Phi_list, sampled_Phi_list=sampled_Phi_list))
else
return(Phi_list)
}
matrixSqrt <- function(m, inverse=FALSE, tol = sqrt(.Machine$double.eps)) {
eigDecomp <- eigen(m, symmetric=TRUE)
Positive <- eigDecomp$values > max(tol * eigDecomp$values[1], 0)
D = NULL
if (inverse) {
D = diag(1/sqrt(eigDecomp$values[Positive]))
} else {
D = diag(sqrt(eigDecomp$values[Positive]))
}
sqrtM <- eigDecomp$vectors[, Positive, drop = FALSE] %*% D %*% t(eigDecomp$vectors[, Positive, drop = FALSE])
return(sqrtM)
}
#' @importFrom utils combn
#' @importFrom parallel detectCores
#' @import foreach
#' @import doMC
preprocessFamilyBasedUDGs <- function(phen.df, covs.df, pedigrees, sampled, fileID, dirToSave, max_cores=NULL,
minK=10, maxFC = 0.01, orthogonal=TRUE, useGPU=FALSE,
debug=TRUE, logFile=NULL) {
if (debug && is.null(logFile)){
logFile = paste0(dirToSave, "preprocessFamilyBasedUDGs_", format(Sys.time(), "%Y%m%d-%Hh%Mm%Ss"), ".log")
}
pseq <- 1:ncol(phen.df)
N <- dim(phen.df)[1]
xyinds <- combn(pseq,2)
Phi2 <- calculateBlockDiagonal2PhiMatrix(pedigrees, FALSE, sampled=sampled)
Z <- calculateBlockDiagonal2PhiMatrix(pedigrees, TRUE, sampled=sampled)
sampledPed <- pedigrees
if(!is.null(sampled)) {
sampledPed <- sampledPed[which(sampled==1),]
}
joblabels <- c("x", "y", "S")
jobqueue <- matrix(NA, 0,length(joblabels))
colnames(jobqueue) <- joblabels
for (xyind in 1:dim(xyinds)[2]) {
Xind <- xyinds[1,xyind] # Xind is always less than Yind
Yind <- xyinds[2,xyind]
Sinds <- pseq[-c(Xind, Yind)]
jobqueue <- rbind(jobqueue, c(Xind,Yind,paste(Sinds, collapse=" ")))
}
if (dim(jobqueue)[1] > 0) {
jobqueue = as.data.frame(jobqueue, stringsAsFactors=FALSE)
jobqueue$x <- as.numeric(jobqueue$x)
jobqueue$y <- as.numeric(jobqueue$y)
fitqueue = as.data.frame(rbind(cbind(rownames(jobqueue), jobqueue[,"x"], jobqueue[,"S"]), cbind(rownames(jobqueue), jobqueue[,"y"], jobqueue[,"S"])), stringsAsFactors=FALSE)
fitqueue[,1] <- as.numeric(fitqueue[,1])
fitqueue[,2] <- as.numeric(fitqueue[,2])
fitqueue = fitqueue[order(fitqueue[,1]),]
notDuplJobs <- !duplicated(fitqueue[,c(2,3)])
xNotDuplJobs <- notDuplJobs[which(1:length(notDuplJobs) %% 2 == 1)]
yNotDuplJobs <- notDuplJobs[which(1:length(notDuplJobs) %% 2 == 0)]
if (is.null(max_cores)) {
max_cores <- detectCores()
}
registerDoMC(max_cores)
batches <- list()
i = 1
notJobs <- intersect(which(xNotDuplJobs), which(yNotDuplJobs))
notBatches <- split(notJobs, ceiling(seq_along(notJobs)/max_cores))
for (batch in 1:length(notBatches)) {
batches[i] <- notBatches[batch]
i = i + 1
}
xorJobs <- sort(union(intersect(which(xNotDuplJobs == FALSE), which(yNotDuplJobs)), intersect(which(yNotDuplJobs == FALSE), which(xNotDuplJobs))))
if (length(xorJobs) > 0) {
xorBatches <- split(xorJobs, ceiling(seq_along(xorJobs)/max_cores))
for (batch in 1:length(xorBatches)) {
batches[i] <- xorBatches[batch]
i = i + 1
}
}
trueJobs <- intersect(which(xNotDuplJobs == FALSE), which(yNotDuplJobs == FALSE))
if (length(trueJobs) > 0) {
trueBatches <- split(trueJobs, ceiling(seq_along(trueJobs)/max_cores))
for (batch in 1:length(trueBatches)) {
batches[i] <- trueBatches[batch]
i = i + 1
}
}
# process in parallel all jobs in jobqueue and append the output in results.
for (batchId in 1:length(batches)) {
jobi = 0 # a hack to avoid problems when checking --as-cran
foreach(jobi = batches[[batchId]]) %dopar% {
x <- as.numeric(jobqueue[jobi, "x"])
y <- as.numeric(jobqueue[jobi, "y"])
S <- as.numeric(unlist(strsplit(as.character(jobqueue[jobi, "S"]), " ")))
if (debug) {
cat(paste0(Sys.getpid(),
" - Checking computation for ", x, ".", y, "|{",paste(S, collapse=","), "}!"),
file=logFile, append=TRUE, sep="\n")
}
processFile <- TRUE
fileToSave <- paste0(dirToSave, fileID, "_", x, "_", y, "_", paste(S, collapse=""), ".rds")
if (file.exists(fileToSave)) {
if (debug) {
cat(paste0(Sys.getpid(),
" - file already exists for ", x, ".", y, "|{",paste(S, collapse=","), "}..."),
file=logFile, append=TRUE, sep="\n")
}
curResult <- readRDS(fileToSave)
resultsList_g <- curResult$results_g
resultsList_e <- curResult$results_e
ret_g <- checkUnconfoundedEstimatesList(resultsList_g, maxFC, N)
ret_e <- checkUnconfoundedEstimatesList(resultsList_e, maxFC, N)
if (ret_g == "error" || ret_e == "error") {
if (debug) {
cat(paste0("Problem in UnconfoundedEstimatesList - removing file: ", fileToSave, "\n"),
file=logFile, append=TRUE, sep="\n")
cat(paste0(Sys.getpid(),
" - redoing computation for ", x, ".", y, "|{",paste(S, collapse=","), "}..."),
file=logFile, append=TRUE, sep="\n")
}
file.remove(fileToSave)
} else if (ret_g == "done" && ret_e == "done") {
if (debug) {
cat(paste0(Sys.getpid(),
" - computation for ", x, ".", y, "|{",paste(S, collapse=","), "} is done!"),
file=logFile, append=TRUE, sep="\n")
}
processFile <- FALSE
} else {
if (debug) {
cat(paste0(Sys.getpid(),
" - updating computation for ", x, ".", y, "|{",paste(S, collapse=","), "}..."),
file=logFile, append=TRUE, sep="\n")
}
}
}
if (processFile) {
if (debug) {
cat(paste0(Sys.getpid(),
" - doing computation for ", x, ".", y, "|{",paste(S, collapse=","), "}..."),
file=logFile, append=TRUE, sep="\n")
}
curResult <- getFamilyBasedPCorResults(x, y, S, phen.df, covs.df, sampledPed, Phi2, Z, minK,
maxFC, orthogonal, fileID, dirToSave, useGPU, debug, logFile)
if (!is.null(fileID) && !is.null(dirToSave)) {
saveRDS(curResult, file=fileToSave)
}
}
}
}
}
}
#' @importFrom utils combn write.table
#' @import foreach
preprocessFamilyBasedDAGs <- function(phen.df, covs.df, pedigrees, sampled, alpha,
fileID, dirToSave, preprocessedPvaluesCSVFile, max_cores=NULL, minK=10, maxFC = 0.01,
orthogonal=TRUE, savePlots=FALSE, useGPU=FALSE, debug=TRUE, logFile=NULL) {
if (debug) {
cat("Preprocessing pvalues for PC algorithm...",
file=logFile, append=TRUE, sep="\n")
}
Phi2 <- calculateBlockDiagonal2PhiMatrix(pedigrees, FALSE, sampled=sampled)
Z <- calculateBlockDiagonal2PhiMatrix(pedigrees, TRUE, sampled=sampled)
sampledPed <- pedigrees
if(!is.null(sampled)) {
sampledPed <- sampledPed[which(sampled==1),]
}
csvFileName <- preprocessedPvaluesCSVFile
# preprocessing using parallel computing
p <- dim(phen.df)[2]
pseq <- 1:p
skel_t <- matrix(TRUE, p, p)
diag(skel_t) <- FALSE
skel_g <- skel_t
skel_e <- skel_t
labels <- c("x", "y", "S","pcort", "pcort_p.value",
"pcorg", "pcorg_p.value",
"pcore", "pcore_p.value",
"se", "sg")
if (!file.exists(csvFileName)) {
write.table(as.matrix(t(labels)), file = csvFileName, sep = ";", row.names=FALSE, col.names = FALSE, append=FALSE, quote=FALSE)
}
orders <- 0:(p-2)
for (iorder in orders) {
for (pairType in c("upper", "lower")) {
allpairs_g <- which(skel_g == TRUE, arr.ind=TRUE)
allpairs_e <- which(skel_e == TRUE, arr.ind=TRUE)
allpairs_t <- which(skel_t == TRUE, arr.ind=TRUE)
allpairs <- unique(rbind(allpairs_g, allpairs_e, allpairs_t))
remainingEdges <- dim(allpairs)[1]
if (debug) {
cat("Remaining", remainingEdges, "edges",
file=logFile, append=TRUE, sep="\n")
}
if (remainingEdges > 0) {
if(pairType == "upper") {
pairs <- allpairs[which(allpairs[,1] < allpairs[,2]),,drop=FALSE]
pairs <- pairs[order(pairs[,1]),,drop=FALSE]
} else {
pairs <- allpairs[which(allpairs[,1] > allpairs[,2]),,drop=FALSE]
pairs <- pairs[order(pairs[,1]),,drop=FALSE]
}
joblabels <- c("x", "y", "S", "dagt", "dagg", "dage")
jobqueue <- matrix(NA, 0,length(joblabels))
colnames(jobqueue) <- joblabels
results <- c()
for (pairind in 1:dim(pairs)[1]) {
pair <- pairs[pairind,]
x <- as.integer(pair[1])
y <- as.integer(pair[2])
others <- pseq[-pair]
dagg=sum(duplicated(rbind(allpairs_g, pair)))
dage=sum(duplicated(rbind(allpairs_e, pair)))
dagt=sum(duplicated(rbind(allpairs_t, pair)))
if (iorder > 0 && iorder < (p-2)) {
if (debug) {
cat(paste0("Processing iorder ", iorder),
file=logFile, append=TRUE, sep="\n")
}
Ssets <- combn(others, iorder)
for(Sind in 1:dim(Ssets)[2]) {
S = Ssets[,Sind]
pcorinfo <- getPCorInfo(csvFileName,x,y,S,dagg,dage,dagt, debug=debug);
iscompl <- isPcorInfoComplete(pcorinfo, dagt, dagg, dage);
if (length(pcorinfo) > 0 && sum(iscompl) == 3) {
results <- rbind(results, pcorinfo)
} else {
jobqueue <- rbind(jobqueue, c(x,y,paste(S, collapse=" "), !iscompl))
}
}
} else if(iorder == 0) {
S = c()
if (debug) {
cat(paste0("Processing iorder ", iorder),
file=logFile, append=TRUE, sep="\n")
cat(paste0("Checking ", x, ".", y, "|{",paste(S, collapse=","), "}..."),
file=logFile, append=TRUE, sep="\n")
}
pcorinfo <- getPCorInfo(csvFileName,x,y,S,dagg,dage,dagt, debug=debug);
iscompl <- isPcorInfoComplete(pcorinfo, dagt, dagg, dage);
if (length(pcorinfo) > 0 && sum(iscompl) == 3) {
results <- rbind(results, pcorinfo)
} else {
jobqueue <- rbind(jobqueue, c(x,y,"", !iscompl))
}
} else if (iorder == (p-2)) {
if (debug) {
cat(paste0("Processing iorder ", iorder),
file=logFile, append=TRUE, sep="\n")
}
S = others
pcorinfo <- getPCorInfo(csvFileName,x,y,S,dagg,dage,dagt, debug=debug);
iscompl <- isPcorInfoComplete(pcorinfo, dagt, dagg, dage);
if (length(pcorinfo) > 0 && sum(iscompl) == 3) {
results <- rbind(results, pcorinfo)
} else {
jobqueue <- rbind(jobqueue, c(x,y,paste(S, collapse=" "), !iscompl))
}
}
}
if (dim(jobqueue)[1] > 0) {
jobqueue = as.data.frame(jobqueue, stringsAsFactors=FALSE)
jobqueue$x <- as.numeric(jobqueue$x)
jobqueue$y <- as.numeric(jobqueue$y)
fitqueue = as.data.frame(rbind(cbind(rownames(jobqueue), jobqueue[,"x"], jobqueue[,"S"]), cbind(rownames(jobqueue), jobqueue[,"y"], jobqueue[,"S"])), stringsAsFactors=FALSE)
fitqueue[,1] <- as.numeric(fitqueue[,1])
fitqueue[,2] <- as.numeric(fitqueue[,2])
fitqueue = fitqueue[order(fitqueue[,1]),]
notDuplJobs <- !duplicated(fitqueue[,c(2,3)])
xNotDuplJobs <- notDuplJobs[which(1:length(notDuplJobs) %% 2 == 1)]
yNotDuplJobs <- notDuplJobs[which(1:length(notDuplJobs) %% 2 == 0)]
if (is.null(max_cores)) {
max_cores <- detectCores()
}
registerDoMC(max_cores)
batches <- list()
i = 1
notJobs <- intersect(which(xNotDuplJobs), which(yNotDuplJobs))
notBatches <- split(notJobs, ceiling(seq_along(notJobs)/max_cores))
for (batch in 1:length(notBatches)) {
batches[i] <- notBatches[batch]
i = i + 1
}
xorJobs <- sort(union(intersect(which(xNotDuplJobs == FALSE), which(yNotDuplJobs)), intersect(which(yNotDuplJobs == FALSE), which(xNotDuplJobs))))
if (length(xorJobs) > 0) {
xorBatches <- split(xorJobs, ceiling(seq_along(xorJobs)/max_cores))
for (batch in 1:length(xorBatches)) {
batches[i] <- xorBatches[batch]
i = i + 1
}
}
trueJobs <- intersect(which(xNotDuplJobs == FALSE), which(yNotDuplJobs == FALSE))
if (length(trueJobs) > 0) {
trueBatches <- split(trueJobs, ceiling(seq_along(trueJobs)/max_cores))
for (batch in 1:length(trueBatches)) {
batches[i] <- trueBatches[batch]
i = i + 1
}
}
# process in parallel all jobs in jobqueue and append the output in results.
for (batchId in 1:length(batches)) {
curResults <- c()
jobi = 0 # a hack to avoid problems when checking --as-cran
curResults <- foreach(jobi = batches[[batchId]],
.combine='rbind') %dopar% {
#for(jobi in batches[[batchId]]) {
x <- as.numeric(jobqueue[jobi, "x"])
y <- as.numeric(jobqueue[jobi, "y"])
S <- as.numeric(unlist(strsplit(as.character(jobqueue[jobi, "S"]), " ")))
dagt <- as.logical(jobqueue[jobi, "dagt"])
dagg <- as.logical(jobqueue[jobi, "dagg"])
dage <-as.logical(jobqueue[jobi, "dage"])
suffStat <- list()
suffStat$preprocessedPvaluesCSVFile <- csvFileName
suffStat$savePlots <- savePlots
suffStat$dagt <- as.logical(dagt)
suffStat$dagg <- as.logical(dagg)
suffStat$dage <- as.logical(dage)
if (debug) {
cat(paste0(Sys.getpid(), " - doing computation for ", x, ".", y, "|{",paste(S, collapse=","), "}..."),
file=logFile, append=TRUE, sep="\n")
}
ret <- familyBasedPCorTest(x, y, S, phen.df, covs.df, sampledPed, Phi2, Z,
minK=minK, maxFC=maxFC, orthogonal=orthogonal, alpha=alpha,
dirToSave, fileID, suffStat$savePlots, useGPU=useGPU, debug=debug, logFile=logFile)
if (debug) {
cat(paste0(Sys.getpid(), " - computation for ", x, ".", y, "|{",paste(S, collapse=","), "} is done:\n",
paste(ret, collapse=";")),
file=logFile, append=TRUE, sep="\n")
}
currow <- c(x=x,y=y,S=paste(S, collapse=" "), getRetVec(ret))
currow
}
if (!is.null(curResults)) {
if (length(curResults) == 11) {
# converting the vector to a matrix
curResultsColNames <- names(curResults)
curResults <- matrix(curResults, ncol=11)
colnames(curResults) <- curResultsColNames
}
updatePreprocessedPvaluesTable(curResults, csvFileName)
results <- rbind(results, curResults)
}
}
}
if (is.null(results)) {
if (length(results) > 0)
colnames(results) <- labels
# updating skel matrices
rowids_g <- as.numeric(which(as.numeric(results[,"pcorg_p.value"]) > alpha))
skel_g[cbind(as.numeric(results[rowids_g, "x"]), as.numeric(results[rowids_g, "y"]))] <- FALSE
skel_g[lower.tri(skel_g)] <- (t(skel_g))[lower.tri(skel_g)]
rowids_e <- as.numeric(which(as.numeric(results[,"pcore_p.value"]) > alpha))
skel_e[cbind(as.numeric(results[rowids_e, "x"]), as.numeric(results[rowids_e, "y"]))] <- FALSE
skel_e[lower.tri(skel_e)] <- (t(skel_e))[lower.tri(skel_e)]
rowids_t <- as.numeric(which(as.numeric(results[,"pcort_p.value"]) > alpha))
skel_t[cbind(as.numeric(results[rowids_t, "x"]), as.numeric(results[rowids_t, "y"]))] <- FALSE
skel_t[lower.tri(skel_t)] <- (t(skel_t))[lower.tri(skel_t)]
}
}
}
}
}
# Function to compute the partial correlation coefficient
# It must be independent of the PC algorithm or any other approach that uses suffStat.
# pedigrees must be sampled
familyBasedPCorTest <- function(x, y, S, phen.df, covs.df, pedigrees, Phi2, Z,
minK=10, maxFC = 0.01, orthogonal=TRUE, alpha=0.05,
dirToSave=NULL, fileID=NULL, savePlots=FALSE, useGPU=FALSE,
debug=TRUE, logFile="./log.txt") {
if (debug) {
cat(paste0("Running familyBasedPCorTest for ", fileID, "; useGPU=", useGPU),
file=logFile, append=TRUE, sep="\n")
}
fileToSave <- NULL
if (!is.null(fileID) && !is.null(dirToSave)) {
fileToSave <- paste0(dirToSave, fileID, "_", x, "_", y, "_", paste(S, collapse=""), ".rds")
if (x > y) {
fileToSave <- paste0(dirToSave, fileID, "_", y, "_", x, "_", paste(S, collapse=""), ".rds")
}
}
# NOTE: We have to compute this every time to check if results are complete.
curResult <- getFamilyBasedPCorResults(x, y, S, phen.df, covs.df, pedigrees, Phi2, Z, minK, maxFC,
orthogonal, fileID, dirToSave, useGPU, debug, logFile)
results <- selectFamilyBasedPCor(curResult, alpha, maxFC, curResultFileName=fileToSave,
debug=debug, logFile=logFile)
if (savePlots && !is.null(fileID) && !is.null(dirToSave)) {
if (debug) {
cat("Generating result plot...",
file=logFile, append=TRUE, sep="\n")
}
saveEdgeResultsPlot(curResult, dirToSave, fileID, x, y, S, alpha, maxFC, debug=debug, logFile=logFile)
}
return(results)
}
# curResult is the output of the getFamilyBasedPCorResults function
selectFamilyBasedPCor <- function(curResult, alpha, maxFC, curResultFileName="", debug=TRUE, logFile="./log.txt") {
if (debug) {
cat(paste0("Selecting from results in ", curResultFileName),
file=logFile, append=TRUE, sep="\n")
}
resultsList_g <- curResult$results_g
resultsList_e <- curResult$results_e
selectedInds <- selectBestResult(curResult$results_g, curResult$results_e,
curResult$rho_t, curResult$p.value_t, alpha, maxFC, debug=debug, logFile=logFile)
pcorg <- resultsList_g[[selectedInds[1]]]$estimate
pcorg_p.value <- resultsList_g[[selectedInds[1]]]$p.value
sg <- resultsList_g[[selectedInds[1]]]$k
pcore <- resultsList_e[[selectedInds[2]]]$estimate
pcore_p.value <- resultsList_e[[selectedInds[2]]]$p.value
se <- resultsList_e[[selectedInds[2]]]$k
ret <- list(pcort=as.numeric(curResult$rho_t), pcort_p.value=curResult$p.value_t,
pcorg=as.numeric(pcorg), pcorg_p.value=pcorg_p.value,
pcore=as.numeric(pcore), pcore_p.value=pcore_p.value,
se=se, sg=sg)
return(unlist(ret))
}
isPcorInfoComplete <- function(pcorinfo, dagt, dagg, dage) {
isDAGInfoComplete <- function(dag, pvalue) {
if (!dag || (dag && !is.na(as.numeric(pvalue)))) {
return(TRUE)
} else {
return(FALSE)
}
}
return(c(
isDAGInfoComplete(dagt, pcorinfo["pcort_p.value"]),
isDAGInfoComplete(dagg, pcorinfo["pcorg_p.value"]),
isDAGInfoComplete(dage, pcorinfo["pcore_p.value"])))
}
#' @importFrom utils read.table write.table
getPCorInfo <- function(csvfile, x, y, S, dagg, dage, dagt,
debug=TRUE, logFile="./log.txt") {
retvalue = FALSE;
if (file.exists(csvfile)) {
if (length(S) == 0)
S <- ""
preprocessedPvalues <- read.table(file=csvfile, sep=";", header=TRUE)
narows <- which(is.na(preprocessedPvalues$S))
if (length(narows) > 0)
preprocessedPvalues[narows,"S"] <- ""
Svalue <- paste(S, collapse=" ")
xvalues <- which(preprocessedPvalues[,"x"] == x)
yvalues <- which(preprocessedPvalues[,"y"] == y)
Svalues <- which(preprocessedPvalues[,"S"] == Svalue)
ind <- intersect(intersect(xvalues,yvalues), Svalues)
if (length(ind) > 1) {
if (debug) {
cat(paste0("WARNING in pcorExists: repeated entries for ", x, ".", y, "|{",paste(S, collapse=","), "}"),
file=logFile, append=TRUE, sep="\n")
}
maxrow <- apply(preprocessedPvalues[ind,], 2, FUN=function(x){max(x,na.rm=TRUE)})
preprocessedPvalues[ind[1],] <- maxrow
preprocessedPvalues <- preprocessedPvalues[-ind[-1],]
write.table(preprocessedPvalues, file = csvfile, sep = ";",
row.names=FALSE, col.names = TRUE, append=FALSE, quote=FALSE)
ind = ind[1]
}
return(preprocessedPvalues[ind,])
} else {
if (debug) {
cat("WARNING!!! Empty return",
file=logFile, append=TRUE, sep="\n")
}
return(c())
}
}
getRetVec <- function(familyBasedPCorTestRet) {
return(c(
familyBasedPCorTestRet["pcort"],
familyBasedPCorTestRet["pcort_p.value"],
familyBasedPCorTestRet["pcorg"],
familyBasedPCorTestRet["pcorg_p.value"],
familyBasedPCorTestRet["pcore"],
familyBasedPCorTestRet["pcore_p.value"],
familyBasedPCorTestRet["se"],
familyBasedPCorTestRet["sg"]))
}
#' @importFrom utils write.table
updatePreprocessedPvaluesTable <- function(curResults, csvFileName) {
if (length(curResults) > 0 && nrow(curResults) > 0) {
labels <- c("x", "y", "S","pcort", "pcort_p.value",
"pcorg", "pcorg_p.value",
"pcore", "pcore_p.value",
"se", "sg")
if (!file.exists(csvFileName)) {
write.table(as.matrix(t(labels)), file = csvFileName, sep = ";", row.names=FALSE, col.names = FALSE, append=FALSE, quote=FALSE)
}
write.table(curResults, file = csvFileName, sep = ";", row.names=FALSE,
col.names = FALSE, append=TRUE, quote=FALSE)
}
}
#' @importFrom utils write.table
#' @importFrom pcalg pc rfci
generateFamilyDAG <- function(phen.df, covs.df, pedigrees, sampled=NULL,
preprocessedPvaluesCSVFile, type, alpha,
hidden_vars = FALSE, maj.rule=FALSE,
minK=10, maxFC = 0.01, orthogonal=TRUE,
dirToSave, fileID, savePlots, useGPU=FALSE, debug=TRUE, logFile="./log.txt") {
labels <- c("x", "y", "S","pcort", "pcort_p.value",
"pcorg", "pcorg_p.value",
"pcore", "pcore_p.value",
"se", "sg")
# NOTE: cannot run this in parallel, since problems occur
# when the csvfile is updated by multiple processes.
dagg <- dage <- dagt <- FALSE
usedPvaluesCSVFile_g <- usedPvaluesCSVFile_e <- usedPvaluesCSVFile_t <- NULL
if (type == "g") {
dagg <- TRUE
usedPvaluesCSVFile_g <- paste0(dirToSave, fileID, "_usedPvalues_g.csv")
write.table(as.matrix(t(labels)), file = usedPvaluesCSVFile_g, sep = ";", row.names=FALSE, col.names = FALSE, append=FALSE, quote=FALSE)
} else if (type == "e") {
dage <- TRUE
usedPvaluesCSVFile_e <- paste0(dirToSave, fileID, "_usedPvalues_e.csv")
write.table(as.matrix(t(labels)), file = usedPvaluesCSVFile_e, sep = ";", row.names=FALSE, col.names = FALSE, append=FALSE, quote=FALSE)
} else {
dagt <- TRUE
usedPvaluesCSVFile_t <- paste0(dirToSave, fileID, "_usedPvalues_t.csv")
write.table(as.matrix(t(labels)), file = usedPvaluesCSVFile_t, sep = ";", row.names=FALSE, col.names = FALSE, append=FALSE, quote=FALSE)
}
if (debug) {
cat(paste0("Generating DAG_", type, " for ", fileID, "; useGPU=", useGPU),
file=logFile, append=TRUE, sep="\n")
}
Phi2 <- calculateBlockDiagonal2PhiMatrix(pedigrees, FALSE, sampled=sampled)
Z <- calculateBlockDiagonal2PhiMatrix(pedigrees, TRUE, sampled=sampled)
sampledPed <- pedigrees
if(!is.null(sampled)) {
sampledPed <- sampledPed[which(sampled==1),]
}
suffStat <- list(
phen.df=phen.df, covs.df=covs.df, pedigrees=sampledPed,
minK=minK, maxFC=maxFC, orthogonal=orthogonal,
alpha=alpha,
Phi2=Phi2, Z=Z,
dagg=dagg, dage=dage, dagt=dagt,
maj.rule = maj.rule,
preprocessedPvaluesCSVFile=preprocessedPvaluesCSVFile,
usedPvaluesCSVFile_g=usedPvaluesCSVFile_g,
usedPvaluesCSVFile_e=usedPvaluesCSVFile_e,
usedPvaluesCSVFile_t=usedPvaluesCSVFile_t,
savePlots=savePlots,
dirToSave = dirToSave, fileID = fileID, useGPU=useGPU)
if (hidden_vars == FALSE) {
if (suffStat$maj.rule == FALSE) {
if (debug) {
cat("\nRunning conservative PC algorithm...",
file=logFile, append=TRUE, sep="\n")
}
dag <- pc(suffStat=suffStat, indepTest = familyBasedCITest,
alpha=alpha, labels = paste0(colnames(phen.df), "_", type), verbose = TRUE,
skel.method="stable", solve.confl=TRUE, conservative=TRUE, u2pd="relaxed")
} else {
if (debug) {
cat("\nRunning PC algorithm with majority rule ...",
file=logFile, append=TRUE, sep="\n")
}
dag <- pc(suffStat=suffStat, indepTest = familyBasedCITest,
alpha=alpha, labels = paste0(colnames(phen.df), "_", type), verbose = TRUE,
skel.method="stable", solve.confl=TRUE, maj.rule = TRUE, u2pd="relaxed")
}
} else {
if (suffStat$maj.rule == FALSE) {
if (debug) {
cat("\nRunning conservative RFCI algorithm...",
file=logFile, append=TRUE, sep="\n")
}
dag <- rfci(suffStat=suffStat, indepTest = familyBasedCITest,
alpha=alpha, labels = paste0(colnames(phen.df), "_", type), verbose = TRUE,
skel.method="stable", conservative=TRUE)
} else {
if (debug) {
cat("\nRunning RFCI algorithm with majority rule ...",
file=logFile, append=TRUE, sep="\n")
}
dag <- rfci(suffStat=suffStat, indepTest = familyBasedCITest,
alpha=alpha, labels = paste0(colnames(phen.df), "_", type), verbose = TRUE,
skel.method="stable", maj.rule = TRUE)
}
}
return(dag)
}
#' @importFrom methods as
printEdgeList <- function(dag) {
nodeLabels <- dag@graph@nodes
adjM <- as(dag, "amat")
cat("\nAdjacency Matrix:\n")
print(adjM)
cat("\nType 1 Edges:\n")
edgePairs <- which(adjM == 1, arr.ind=TRUE)
if (dim(edgePairs)[1] > 0) {
for (i in 1:dim(edgePairs)[1]) {
if(adjM[edgePairs[i,2],edgePairs[i,1]] == 1) {
# it is an undirected edge
if (edgePairs[i,2] < edgePairs[i,1])
cat(paste0("\n", nodeLabels[edgePairs[i,2]], " --- ", nodeLabels[edgePairs[i,1]]))
} else {
# it is a directed edge (column causes row)
cat(paste0("\n", nodeLabels[edgePairs[i,2]], " --> ", nodeLabels[edgePairs[i,1]]))
}
}
}
cat("\n\nType 2 Edges:\n")
edgePairs2 <- which(adjM == 2, arr.ind=TRUE)
if (dim(edgePairs2)[1] > 0) {
for (i in 1:dim(edgePairs2)[1]) {
if(adjM[edgePairs2[i,2],edgePairs2[i,1]] == 2) {
# it is an undirected edge
if (edgePairs2[i,2] < edgePairs2[i,1])
cat(paste0("\n", nodeLabels[edgePairs2[i,2]], " <--> ", nodeLabels[edgePairs2[i,1]]))
} else {
cat(paste0("\n", nodeLabels[edgePairs2[i,2]], " --> ", nodeLabels[edgePairs2[i,1]]))
}
}
}
}
#TODO
library(igraph) # as.undirected
library(corpcor)# - pseudoinverse
##########################################
# Determining Partial Correlation Matrix #
##########################################
mypmin <- function(vec1, vec2) {
pminvec <- c()
for (i in 1:length(vec1)) {
if (abs(vec1[i]) < abs(vec2[i]))
pminvec <- c(pminvec, vec1[i])
else
pminvec <- c(pminvec, vec2[i])
}
return(pminvec)
}
mypmax <- function(vec1, vec2) {
pmaxvec <- c()
for (i in 1:length(vec1)) {
if (abs(vec1[i]) > abs(vec2[i]))
pmaxvec <- c(pmaxvec, vec1[i])
else
pmaxvec <- c(pmaxvec, vec2[i])
}
return(pmaxvec)
}
# rule can be "max" or "min"
symmetrizeMatrix <- function(amatrix, rule="max") {
lowind <- which(lower.tri(amatrix), arr.ind=TRUE)
uppind <- which(upper.tri(amatrix), arr.ind=TRUE)
uppind <- uppind[order(uppind[,1]),]
offdiagind <- rbind(lowind, uppind)
new.values <- amatrix[offdiagind]
if (rule == "max") {
new.values <- mypmax((new.values[1:(length(new.values)/2)]), (new.values[(length(new.values)/2+1):length(new.values)]))
} else {
new.values <- mypmin((new.values[1:(length(new.values)/2)]), (new.values[(length(new.values)/2+1):length(new.values)]))
}
amatrix[offdiagind] <- new.values
return(amatrix)
}
convertInvCovToPCor <- function(SigmaInv, symmetrize=FALSE) {
pcorMatrix <- -SigmaInv
diag(pcorMatrix) <- -diag(pcorMatrix)
pcorMatrix <- cov2cor(pcorMatrix)
if (symmetrize)
pcorMatrix <- symmetrizeMatrix(pcorMatrix)
return(pcorMatrix)
}
getPartialCorrelationMatrixFromInvCov <- function(SigmaInv, n, ncov=0, method="z", symmetrize=FALSE) {
pcorMatrix <- convertInvCovToPCor(SigmaInv, symmetrize)
# To test if a sample partial correlation vanishes,
# Fisher's z-transform of the partial correlation can be used.
# See https://en.wikipedia.org/wiki/Partial_correlation#As_conditional_independence_test
gp <- (dim(pcorMatrix)[1]-2) + ncov # number of conditioning variables
# TODO verificar outras estatisticas (copiado da funcao pcor da biblioteca ppcor)
if (method == "kendall") {
# Kendall MG, Stuart A. (1973) The Advanced Theory of Statistics,
# Volume 2 (3rd Edition), ISBN 0-85264-215-6, Section 27.22
statistics <- pcorMatrix/sqrt(2 * (2 * (n - gp) + 5)/(9 * (n -
gp) * (n - 1 - gp)))
p.values <- 2 * pnorm(-abs(statistics))
}
else if (method == "z") {
# z statistic follows a Gaussian distribution
# with zero mean and unit standard deviation.
# The null hyphothesis of zero partial correlation is rejected if
# stat > qnorm(1 - alpha/2)
# The two-sided p-values of the tests are:
# Note that log1p(x) =: log(1+x), so
# log1p(2*r/(1-r)) = log(1 + 2*r/(1-r))
# = log((1-r+2r)/(1-r))
# = log(1+r) - log(1-r)
# = log((1+r)/(1-r))
if ((n - gp - 3) > 0) {
statistics <- sqrt(n - gp - 3)
} else {
statistics <- 0
}
statistics <- statistics * abs(0.5 * log1p(2*pcorMatrix/(1-pcorMatrix)))
p.values <- 2 * (1 - pnorm(abs(statistics))) # or 2 * pnorm(-abs(stat))
}
else { # shenskin
# David Shenskin - 2003 - page 1013 - eq. 28.101
# total number of variables employed in the analysis
# when there are two predictors and one criterion variable, nu = 3
nu <- dim(pcorMatrix)[1] + ncov # gp + 2
statistics <- pcorMatrix * sqrt((n - nu)/(1 - pcorMatrix^2))
p.values <- 2 * pt(-abs(statistics), (n - nu))
# statistics <- pcorMatrix * sqrt((n - 2 - gp)/(1 - pcorMatrix^2))
# p.values <- 2 * pt(-abs(statistics), (n - 2 - gp))
}
diag(p.values) <- NA
return(list(estimates=pcorMatrix, p.values=p.values, statistics=statistics))
}
getPartialCorrelationMatrix <- function(Sigma, n, ncov=0, method="z", symmetrize=FALSE) {
SigmaInv <- pseudoinverse(Sigma)
return(getPartialCorrelationMatrixFromInvCov(SigmaInv, n, ncov, method, symmetrize=symmetrize))
}
############################
# Getting Undirected Graph #
############################
# It applies the correction for multiple tests using all off-diagonal values
adjustPValuesMatrix <- function(pvaluesMatrix, correction="fdr") {
lowind <- which(lower.tri(pvaluesMatrix), arr.ind=TRUE)
uppind <- which(upper.tri(pvaluesMatrix), arr.ind=TRUE)
uppind <- uppind[order(uppind[,1]),]
offdiagind <- rbind(lowind, uppind)
adjp <- p.adjust(pvaluesMatrix[offdiagind], method=correction)
pvaluesMatrix[offdiagind] <- adjp
return(pvaluesMatrix)
}
getUndirectedGraphFromPCorPvalues <- function(pvaluesMatrix, alpha=0.05, correction="fdr", rule="and") {
nVertices <- dim(pvaluesMatrix)[1]
if (!is.null(correction)) {
pvaluesMatrix <- adjustPValuesMatrix(pvaluesMatrix, correction)
}
if (rule == "and") {
pvaluesMatrix <- symmetrizeMatrix(pvaluesMatrix, rule="max")
} else { #rule == or
pvaluesMatrix <- symmetrizeMatrix(pvaluesMatrix, rule="min")
}
edges <- which(pvaluesMatrix <= alpha, arr.ind=TRUE)
adjMatrix <- matrix(0, nVertices, nVertices)
adjMatrix[edges] <- 1
colnames(adjMatrix) <- colnames(pvaluesMatrix)
row.names(adjMatrix) <- colnames(pvaluesMatrix)
udg <- as.undirected(graph.adjacency(adjMatrix))
return(list(udg=udg, adj.pvalues=pvaluesMatrix))
}
getUndirectedGraphFromPCor <- function(pCor, alpha, correction="fdr") {
pvaluesMatrix <- pCor$p.values
diag(pvaluesMatrix) <- 1
return(getUndirectedGraphFromPCorPvalues(pvaluesMatrix, alpha=0.05, correction))
}
getUndirectedGraphFromCov <- function(Sigma, n, ncov=0, alpha=0.05, correction="fdr", method="z", symmetrize=FALSE) {
pCor <- getPartialCorrelationMatrix(Sigma, n, ncov, method, symmetrize)
ret <- getUndirectedGraphFromPCor(pCor, alpha, correction)
return(list(udg=ret$udg, adj.pvalues=ret$adj.pvalues, pCor=pCor))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.