Nothing
glmm.score <- function(obj, infile, outfile, BGEN.samplefile = NULL, center = T, select = NULL, MAF.range = c(1e-7, 0.5), miss.cutoff = 1, missing.method = "impute2mean", nperbatch = 100, tol = 1e-5, infile.nrow = NULL, infile.nrow.skip = 0, infile.sep = "\t", infile.na = "NA", infile.ncol.skip = 1, infile.ncol.print = 1, infile.header.print = "SNP", is.dosage = FALSE, ncores = 1, verbose = FALSE) {
is.Windows <- Sys.info()["sysname"] == "Windows"
if(!inherits(obj, c("glmmkin", "glmmkin.multi"))) stop("Error: obj must be a class glmmkin or glmmkin.multi object!")
n.pheno <- obj$n.pheno
if(any(duplicated(obj$id_include))) {
J <- sparseMatrix(i=1:length(obj$id_include), j=match(obj$id_include,unique(obj$id_include)), x=1)
res <- as.vector(as.matrix(crossprod(J, obj$scaled.residuals)))
if(!is.null(obj$P)) obj$P <- as.matrix(crossprod(J, crossprod(obj$P, J)))
else {
obj$Sigma_iX <- crossprod(J, obj$Sigma_iX)
obj$Sigma_i <- forceSymmetric(crossprod(J,crossprod(obj$Sigma_i,J)))
obj$Sigma_i <- Matrix(obj$Sigma_i, sparse = TRUE)
}
rm(J)
} else res <- obj$scaled.residuals
n <- length(unique(obj$id_include))
miss.method <- try(match.arg(missing.method, c("impute2mean", "omit")))
if(inherits(miss.method, "try-error")) stop("Error: missing.method should be one of the following: impute2mean, omit!")
miss.method <- substr(miss.method, 1, 1)
if(all(file.exists(paste(infile, c("bim", "bed", "fam"), sep=".")))) {
if(ncores != 1) stop("Error: parallel computing currently not implemented for PLINK binary format genotypes.")
if(inherits(obj, "glmmkin.multi")) stop("Error: multiple phenotypes currently not implemented for PLINK binary format genotypes.")
bimfile <- paste(infile, "bim", sep=".")
bedfile <- paste(infile, "bed", sep=".")
famfile <- paste(infile, "fam", sep=".")
sample.id <- fread(famfile, header=F, data.table=F)[,2]
if(is.null(select)) {
if(any(is.na(match(unique(obj$id_include), sample.id)))) warning("Check your data... Some id_include in obj are missing in sample.id of infile!")
select <- match(sample.id, unique(obj$id_include))
select[is.na(select)] <- 0
if(all(select == 0)) stop("Error: id_include in obj does not match sample.id in infile!")
}
if(length(select) != length(sample.id)) stop("Error: number of individuals in select does not match infile!")
select2 <- select[select > 0]
if(any(duplicated(select2)) || max(select2) > length(unique(obj$id_include))) stop("Error: select is a vector of orders, individuals not in obj should be coded 0!")
res <- res[select2]
if(!is.null(obj$P)) obj$P <- obj$P[select2, select2]
else {
obj$Sigma_iX <- Matrix(obj$Sigma_iX[select2, , drop = FALSE], sparse = TRUE)
obj$Sigma_i <- Matrix(obj$Sigma_i[select2, select2], sparse = TRUE)
obj$cov <- Matrix(obj$cov, sparse = TRUE)
}
select[select > 0] <- 1:sum(select > 0)
rm(select2)
if(center) {
if(!is.null(obj$P)) time <- .Call(C_glmm_score_bed, res, obj$P, bimfile, bedfile, outfile, 'c', MAF.range[1], MAF.range[2], miss.cutoff, miss.method, nperbatch, select)
else time <- .Call(C_glmm_score_bed_sp, res, obj$Sigma_i, obj$Sigma_iX, obj$cov, bimfile, bedfile, outfile, 'c', MAF.range[1], MAF.range[2], miss.cutoff, miss.method, nperbatch, select)
} else {
if(!is.null(obj$P)) time <- .Call(C_glmm_score_bed, res, obj$P, bimfile, bedfile, outfile, 'n', MAF.range[1], MAF.range[2], miss.cutoff, miss.method, nperbatch, select)
else time <- .Call(C_glmm_score_bed_sp, res, obj$Sigma_i, obj$Sigma_iX, obj$cov, bimfile, bedfile, outfile, 'n', MAF.range[1], MAF.range[2], miss.cutoff, miss.method, nperbatch, select)
}
#print(sprintf("Computational time: %.2f seconds", time))
return(invisible(time))
} else if(grepl("\\.gds$", infile[1])) { # GDS genotype file
if(is.Windows && ncores > 1) {
warning("The package doMC is not available on Windows... Switching to single thread...")
ncores <- 1
}
ncores <- min(c(ncores, parallel::detectCores(logical = TRUE)))
if (!inherits(infile, "SeqVarGDSClass")) {
gds <- SeqArray::seqOpen(infile)
} else {
gds <- infile
}
sample.id <- SeqArray::seqGetData(gds, "sample.id")
if(is.null(select)) {
if(any(is.na(match(unique(obj$id_include), sample.id)))) warning("Check your data... Some id_include in obj are missing in sample.id of infile!")
select <- match(sample.id, unique(obj$id_include))
select[is.na(select)] <- 0
if(all(select == 0)) stop("Error: id_include in obj does not match sample.id in infile!")
}
if(length(select) != length(sample.id)) stop("Error: number of individuals in select does not match infile!")
select2 <- select[select > 0]
if(any(duplicated(select2)) || max(select2) > length(unique(obj$id_include))) stop("Error: select is a vector of orders, individuals not in obj should be coded 0!")
if(inherits(obj, "glmmkin.multi")) {
res <- res[select2, , drop = FALSE]
select2 <- rep(select2, n.pheno) + rep((0:(n.pheno-1))*n, each = length(select2))
} else {
res <- res[select2]
}
if(!is.null(obj$P)) obj$P <- obj$P[select2, select2]
else {
obj$Sigma_iX <- obj$Sigma_iX[select2, , drop = FALSE]
obj$Sigma_i <- obj$Sigma_i[select2, select2]
}
rm(select2)
variant.idx.all <- SeqArray::seqGetData(gds, "variant.id")
if (!inherits(infile, "SeqVarGDSClass")) {
SeqArray::seqClose(gds)
}
p.all <- length(variant.idx.all)
if(ncores > 1) {
doMC::registerDoMC(cores = ncores)
p.percore <- (p.all-1) %/% ncores + 1
n.p.percore_1 <- p.percore * ncores - p.all
foreach(b = 1:ncores, .inorder=FALSE, .options.multicore = list(preschedule = FALSE, set.seed = FALSE)) %dopar% {
variant.idx <- if(b <= n.p.percore_1) variant.idx.all[((b-1)*(p.percore-1)+1):(b*(p.percore-1))] else variant.idx.all[(n.p.percore_1*(p.percore-1)+(b-n.p.percore_1-1)*p.percore+1):(n.p.percore_1*(p.percore-1)+(b-n.p.percore_1)*p.percore)]
p <- length(variant.idx)
if(verbose) {
if(b==1) cat("Progress of score test:\n")
pb <- txtProgressBar(min = 0, max = p, style = 3)
cat("\n")
}
if (!inherits(infile, "SeqVarGDSClass")) {
gds <- SeqArray::seqOpen(infile)
} else {
gds <- infile
}
SeqArray::seqSetFilter(gds, sample.id = sample.id[select > 0], verbose = FALSE)
rm(sample.id); rm(select)
nbatch.flush <- (p-1) %/% 100000 + 1
ii <- 0
for(i in 1:nbatch.flush) {
if(verbose) setTxtProgressBar(pb, (i-1)*100000)
gc()
tmp.variant.idx <- if(i == nbatch.flush) variant.idx[((i-1)*100000+1):p] else variant.idx[((i-1)*100000+1):(i*100000)]
SeqArray::seqSetFilter(gds, variant.id = tmp.variant.idx, verbose = FALSE)
MISSRATE <- if(is.dosage) SeqArray::seqApply(gds, "annotation/format/DS", function(xx) mean(is.na(xx)), margin = "by.variant", as.is = "double") else SeqVarTools::missingGenotypeRate(gds, margin = "by.variant")
AF <- if(is.dosage) SeqArray::seqApply(gds, "annotation/format/DS", mean, margin = "by.variant", as.is = "double", na.rm = TRUE)/2 else 1 - SeqVarTools::alleleFrequency(gds)
include <- (MISSRATE <= miss.cutoff & ((AF >= MAF.range[1] & AF <= MAF.range[2]) | (AF >= 1-MAF.range[2] & AF <= 1-MAF.range[1])))
if(sum(include) == 0) next
ii <- ii + 1
tmp.variant.idx <- tmp.variant.idx[include]
tmp.p <- length(tmp.variant.idx)
SeqArray::seqSetFilter(gds, variant.id = tmp.variant.idx, verbose = FALSE)
SNP <- SeqArray::seqGetData(gds, "annotation/id")
SNP[SNP == ""] <- NA
out <- data.frame(SNP = SNP, CHR = SeqArray::seqGetData(gds, "chromosome"), POS = SeqArray::seqGetData(gds, "position"))
rm(SNP)
alleles.list <- strsplit(SeqArray::seqGetData(gds, "allele"), ",")
out$REF <- unlist(lapply(alleles.list, function(x) x[1]))
out$ALT <- unlist(lapply(alleles.list, function(x) paste(x[-1], collapse=",")))
out$MISSRATE <- MISSRATE[include]
out$AF <- AF[include]
rm(alleles.list, include)
tmp.out <- lapply(1:((tmp.p-1) %/% nperbatch + 1), function(j) {
tmp2.variant.idx <- if(j == (tmp.p-1) %/% nperbatch + 1) tmp.variant.idx[((j-1)*nperbatch+1):tmp.p] else tmp.variant.idx[((j-1)*nperbatch+1):(j*nperbatch)]
SeqArray::seqSetFilter(gds, variant.id = tmp2.variant.idx, verbose = FALSE)
geno <- if(is.dosage) SeqVarTools::imputedDosage(gds, use.names = FALSE) else SeqVarTools::altDosage(gds, use.names = FALSE)
N <- nrow(geno) - colSums(is.na(geno))
if(center) geno <- scale(geno, scale = FALSE)
miss.idx <- which(is.na(geno))
if(length(miss.idx)>0) {
geno[miss.idx] <- if(!center & missing.method == "impute2mean") colMeans(geno, na.rm = TRUE)[ceiling(miss.idx/nrow(geno))] else 0
}
if(inherits(obj, "glmmkin.multi")) {
SCORE <- crossprod(res, geno)
geno <- Diagonal(n = n.pheno) %x% geno
} else {
SCORE <- as.vector(crossprod(geno, res))
}
if(!is.null(obj$P)) VAR <- crossprod(geno, crossprod(obj$P, geno))
else {
GSigma_iX <- crossprod(geno, obj$Sigma_iX)
VAR <- crossprod(geno, crossprod(obj$Sigma_i, geno)) - tcrossprod(GSigma_iX, tcrossprod(GSigma_iX, obj$cov))
}
if(inherits(obj, "glmmkin.multi")) {
VAR_PVAL <- sapply(1:length(N), function(xx) {
VAR2 <- as.matrix(VAR[(0:(n.pheno-1))*length(N)+xx,(0:(n.pheno-1))*length(N)+xx])
PVAL <- try(pchisq(crossprod(SCORE[,xx], solve(VAR2, SCORE[,xx])), df = n.pheno, lower.tail = FALSE))
if(inherits(PVAL, "try-error")) PVAL <- NA
c(VAR2[lower.tri(VAR2, diag = TRUE)], PVAL)
})
return(rbind(N, SCORE, VAR_PVAL))
} else {
VAR <- diag(VAR)
PVAL <- ifelse(VAR>0, pchisq(SCORE^2/VAR, df=1, lower.tail=FALSE), NA)
return(rbind(N, SCORE, VAR, PVAL))
}
})
if(inherits(obj, "glmmkin.multi")) {
tmp.out <- matrix(unlist(tmp.out), ncol = 2+n.pheno+n.pheno*(n.pheno+1)/2, byrow = TRUE, dimnames = list(NULL, c("N", paste0("SCORE",1:n.pheno), paste0("VAR",rep(1:n.pheno,n.pheno:1),unlist(lapply(1:n.pheno,function(xx) xx:n.pheno))), "PVAL")))
out <- cbind(out[,c("SNP","CHR","POS","REF","ALT")], tmp.out[,"N"], out[,c("MISSRATE","AF")], tmp.out[,c(paste0("SCORE",1:n.pheno), paste0("VAR",rep(1:n.pheno,n.pheno:1),unlist(lapply(1:n.pheno,function(xx) xx:n.pheno))), "PVAL")])
} else {
tmp.out <- matrix(unlist(tmp.out), ncol = 4, byrow = TRUE, dimnames = list(NULL, c("N", "SCORE", "VAR", "PVAL")))
out <- cbind(out[,c("SNP","CHR","POS","REF","ALT")], tmp.out[,"N"], out[,c("MISSRATE","AF")], tmp.out[,c("SCORE","VAR","PVAL")])
}
names(out)[6] <- "N"
rm(tmp.out)
if(b == 1) {
write.table(out, outfile, quote=FALSE, row.names=FALSE, col.names=(ii == 1), sep="\t", append=(ii > 1), na="NA")
} else {
write.table(out, paste0(outfile, "_tmp.", b), quote=FALSE, row.names=FALSE, col.names=FALSE, sep="\t", append=(ii > 1), na="NA")
}
rm(out)
}
SeqArray::seqClose(gds)
if(verbose) {
setTxtProgressBar(pb, p)
close(pb)
}
}
if (inherits(infile, "SeqVarGDSClass")) {
SeqArray::seqClose(infile)
}
for(b in 2:ncores) {
system(paste0("cat ", outfile, "_tmp.", b, " >> ", outfile))
unlink(paste0(outfile, "_tmp.", b))
}
} else {
variant.idx <- variant.idx.all
rm(variant.idx.all)
p <- length(variant.idx)
if(verbose) {
if(is.Windows) pb <- winProgressBar(min = 0, max = p)
else {
cat("Progress of score test:\n")
pb <- txtProgressBar(min = 0, max = p, style = 3)
cat("\n")
}
}
if (!inherits(infile, "SeqVarGDSClass")) {
gds <- SeqArray::seqOpen(infile)
}
SeqArray::seqSetFilter(gds, sample.id = sample.id[select > 0], verbose = FALSE)
rm(sample.id); rm(select)
nbatch.flush <- (p-1) %/% 100000 + 1
ii <- 0
for(i in 1:nbatch.flush) {
if(verbose) {
if(is.Windows) setWinProgressBar(pb, (i-1)*100000, title=paste0("Progress of score test: ",round((i-1)*100000/p*100),"%"))
else setTxtProgressBar(pb, (i-1)*100000)
}
gc()
tmp.variant.idx <- if(i == nbatch.flush) variant.idx[((i-1)*100000+1):p] else variant.idx[((i-1)*100000+1):(i*100000)]
tmp.p <- length(tmp.variant.idx)
SeqArray::seqSetFilter(gds, variant.id = tmp.variant.idx, verbose = FALSE)
MISSRATE <- if(is.dosage) SeqArray::seqApply(gds, "annotation/format/DS", function(xx) mean(is.na(xx)), margin = "by.variant", as.is = "double") else SeqVarTools::missingGenotypeRate(gds, margin = "by.variant")
AF <- if(is.dosage) SeqArray::seqApply(gds, "annotation/format/DS", mean, margin = "by.variant", as.is = "double", na.rm = TRUE)/2 else 1 - SeqVarTools::alleleFrequency(gds)
include <- (MISSRATE <= miss.cutoff & ((AF >= MAF.range[1] & AF <= MAF.range[2]) | (AF >= 1-MAF.range[2] & AF <= 1-MAF.range[1])))
if(sum(include) == 0) next
ii <- ii + 1
tmp.variant.idx <- tmp.variant.idx[include]
tmp.p <- length(tmp.variant.idx)
SeqArray::seqSetFilter(gds, variant.id = tmp.variant.idx, verbose = FALSE)
SNP <- SeqArray::seqGetData(gds, "annotation/id")
SNP[SNP == ""] <- NA
out <- data.frame(SNP = SNP, CHR = SeqArray::seqGetData(gds, "chromosome"), POS = SeqArray::seqGetData(gds, "position"))
rm(SNP)
alleles.list <- strsplit(SeqArray::seqGetData(gds, "allele"), ",")
out$REF <- unlist(lapply(alleles.list, function(x) x[1]))
out$ALT <- unlist(lapply(alleles.list, function(x) paste(x[-1], collapse=",")))
out$MISSRATE <- MISSRATE[include]
out$AF <- AF[include]
rm(alleles.list, include)
tmp.out <- lapply(1:((tmp.p-1) %/% nperbatch + 1), function(j) {
tmp2.variant.idx <- if(j == (tmp.p-1) %/% nperbatch + 1) tmp.variant.idx[((j-1)*nperbatch+1):tmp.p] else tmp.variant.idx[((j-1)*nperbatch+1):(j*nperbatch)]
SeqArray::seqSetFilter(gds, variant.id = tmp2.variant.idx, verbose = FALSE)
geno <- if(is.dosage) SeqVarTools::imputedDosage(gds, use.names = FALSE) else SeqVarTools::altDosage(gds, use.names = FALSE)
N <- nrow(geno) - colSums(is.na(geno))
if(center) geno <- scale(geno, scale = FALSE)
miss.idx <- which(is.na(geno))
if(length(miss.idx)>0) {
geno[miss.idx] <- if(!center & missing.method == "impute2mean") colMeans(geno, na.rm = TRUE)[ceiling(miss.idx/nrow(geno))] else 0
}
if(inherits(obj, "glmmkin.multi")) {
SCORE <- crossprod(res, geno)
geno <- Diagonal(n = n.pheno) %x% geno
} else {
SCORE <- as.vector(crossprod(geno, res))
}
if(!is.null(obj$P)) VAR <- crossprod(geno, crossprod(obj$P, geno))
else {
GSigma_iX <- crossprod(geno, obj$Sigma_iX)
VAR <- crossprod(geno, crossprod(obj$Sigma_i, geno)) - tcrossprod(GSigma_iX, tcrossprod(GSigma_iX, obj$cov))
}
if(inherits(obj, "glmmkin.multi")) {
VAR_PVAL <- sapply(1:length(N), function(xx) {
VAR2 <- as.matrix(VAR[(0:(n.pheno-1))*length(N)+xx,(0:(n.pheno-1))*length(N)+xx])
PVAL <- try(pchisq(crossprod(SCORE[,xx], solve(VAR2, SCORE[,xx])), df = n.pheno, lower.tail = FALSE))
if(inherits(PVAL, "try-error")) PVAL <- NA
c(VAR2[lower.tri(VAR2, diag = TRUE)], PVAL)
})
return(rbind(N, SCORE, VAR_PVAL))
} else {
VAR <- diag(VAR)
PVAL <- ifelse(VAR>0, pchisq(SCORE^2/VAR, df=1, lower.tail=FALSE), NA)
return(rbind(N, SCORE, VAR, PVAL))
}
})
if(inherits(obj, "glmmkin.multi")) {
tmp.out <- matrix(unlist(tmp.out), ncol = 2+n.pheno+n.pheno*(n.pheno+1)/2, byrow = TRUE, dimnames = list(NULL, c("N", paste0("SCORE",1:n.pheno), paste0("VAR",rep(1:n.pheno,n.pheno:1),unlist(lapply(1:n.pheno,function(xx) xx:n.pheno))), "PVAL")))
out <- cbind(out[,c("SNP","CHR","POS","REF","ALT")], tmp.out[,"N"], out[,c("MISSRATE","AF")], tmp.out[,c(paste0("SCORE",1:n.pheno), paste0("VAR",rep(1:n.pheno,n.pheno:1),unlist(lapply(1:n.pheno,function(xx) xx:n.pheno))), "PVAL")])
} else {
tmp.out <- matrix(unlist(tmp.out), ncol = 4, byrow = TRUE, dimnames = list(NULL, c("N", "SCORE", "VAR", "PVAL")))
out <- cbind(out[,c("SNP","CHR","POS","REF","ALT")], tmp.out[,"N"], out[,c("MISSRATE","AF")], tmp.out[,c("SCORE","VAR","PVAL")])
}
names(out)[6] <- "N"
rm(tmp.out)
write.table(out, outfile, quote=FALSE, row.names=FALSE, col.names=(ii == 1), sep="\t", append=(ii > 1), na="NA")
rm(out)
}
SeqArray::seqClose(gds)
if(verbose) {
if(is.Windows) setWinProgressBar(pb, p, title="Progress of score test: 100%")
else setTxtProgressBar(pb, p)
close(pb)
}
}
return(invisible(NULL))
} else if (grepl("\\.bgen$", infile)){
if(inherits(obj, "glmmkin.multi")) stop("Error: multiple phenotypes currently not implemented for BGEN format genotypes.")
if(is.Windows && ncores > 1) {
warning("The package doMC is not available on Windows... Switching to single thread...", immediate. = TRUE)
ncores <- 1
}
ncores <- min(c(ncores, parallel::detectCores(logical = TRUE)))
bgenInfo <- .Call(C_bgenHeader, infile)
if (is.null(bgenInfo)) {
opt <- options(show.error.messages = FALSE)
on.exit(options(opt))
stop()
}
if(!is.null(select)){
if(length(select) != bgenInfo$N) stop("Error: number of individuals in select does not match infile!")
} else if (!is.null(BGEN.samplefile)) {
sampleFile <- fread(file = BGEN.samplefile, header = TRUE, data.table=F)
if ((nrow(sampleFile)-1) != bgenInfo$N){
stop(paste0("Error: Number of sample identifiers in sample file (", nrow(sampleFile)-1, ") does not match number of samples in BGEN file (", bgenInfo$N,")."))
}
sample.id <- sampleFile[-1,2]
if(any(is.na(match(unique(obj$id_include), sample.id)))) warning("Check your data... Some id_include in obj are missing in BGEN.samplefile!", immediate. = TRUE)
select <- match(sample.id, unique(obj$id_include))
select[is.na(select)] <- 0
if(all(select == 0)) stop("Error: id_include in obj does not match sample.id in BGEN.samplefile!")
} else {
if (is.null(BGEN.samplefile) && bgenInfo$SampleIdFlag == 0) {
stop("Error: BGEN file does not contain sample identifiers. Use the BGEN.samplefile or select parameter.")
}
if(any(is.na(match(unique(obj$id_include), bgenInfo$SampleIds)))) warning("Check your data... Some id_include in obj are missing in sample.id of infile!", immediate. = TRUE)
select <- match(bgenInfo$SampleIds, unique(obj$id_include))
select[is.na(select)] <- 0
if(all(select == 0)) stop("Error: id_include in obj does not match sample.id in infile!")
}
select2 <- select[select > 0]
if(any(duplicated(select2)) || max(select2) > length(unique(obj$id_include))) stop("Error: select is a vector of orders, individuals not in obj should be coded 0!")
res <- res[select2]
if(!is.null(obj$P)) obj$P <- obj$P[select2, select2]
else {
obj$Sigma_iX <- Matrix(obj$Sigma_iX[select2, , drop = FALSE], sparse = TRUE)
obj$Sigma_i <- Matrix(obj$Sigma_i[select2, select2], sparse = TRUE)
obj$cov <- Matrix(obj$cov, sparse = TRUE)
}
select[select > 0] <- 1:sum(select > 0)
rm(select2)
center2 <- ifelse(center, 'c', 'n')
if (ncores > 1){
if (ncores > bgenInfo$M) {
ncores = bgenInfo$M
warning(paste0("The number of threads specified is greater than the number of variants. Using ", ncores, " cores instead."), immediate. = TRUE)
}
threadInfo <- .Call(C_getVariantPos, infile, bgenInfo$offset, bgenInfo$M, bgenInfo$N, bgenInfo$CompressionFlag, bgenInfo$LayoutFlag, ncores)
doMC::registerDoMC(cores = ncores)
foreach(i=1:ncores) %dopar% {
if (!is.null(obj$P)) {
if (bgenInfo$LayoutFlag == 2) {
.Call(C_glmm_score_bgen13, as.numeric(res), obj$P, infile, paste0(outfile, "_tmp.", i), center2, MAF.range[1], MAF.range[2], miss.cutoff, miss.method, nperbatch, select, threadInfo$begin[i], threadInfo$end[i], threadInfo$pos[i], bgenInfo$N, bgenInfo$CompressionFlag, 1)
} else {
.Call(C_glmm_score_bgen11, as.numeric(res), obj$P, infile, paste0(outfile, "_tmp.", i), center2, MAF.range[1], MAF.range[2], miss.cutoff, miss.method, nperbatch, select, threadInfo$begin[i], threadInfo$end[i], threadInfo$pos[i], bgenInfo$N, bgenInfo$CompressionFlag, 1)
}
} else {
if (bgenInfo$LayoutFlag == 2) {
.Call(C_glmm_score_bgen13_sp, as.numeric(res), obj$Sigma_i, obj$Sigma_iX, obj$cov, infile, paste0(outfile, "_tmp.", i), center2, MAF.range[1], MAF.range[2], miss.cutoff, miss.method, nperbatch, select, threadInfo$begin[i], threadInfo$end[i], threadInfo$pos[i], bgenInfo$N, bgenInfo$CompressionFlag, 1)
} else {
.Call(C_glmm_score_bgen11_sp, as.numeric(res), obj$Sigma_i, obj$Sigma_iX, obj$cov, infile, paste0(outfile, "_tmp.", i), center2, MAF.range[1], MAF.range[2], miss.cutoff, miss.method, nperbatch, select, threadInfo$begin[i], threadInfo$end[i], threadInfo$pos[i], bgenInfo$N, bgenInfo$CompressionFlag, 1)
}
}
}
outTmp <- file(outfile, "w")
writeLines("SNP\tRSID\tCHR\tPOS\tA1\tA2\tN\tAF\tSCORE\tVAR\tPVAL", outTmp)
for(i in 1:ncores){
inTmp <- readLines(paste0(outfile, "_tmp.", i))
writeLines(inTmp, outTmp)
unlink(paste0(outfile, "_tmp.", i))
}
close(outTmp)
} else {
if(!is.null(obj$P)) {
if (bgenInfo$LayoutFlag == 2) {
.Call(C_glmm_score_bgen13, as.numeric(res), obj$P, infile, outfile, center2, MAF.range[1], MAF.range[2], miss.cutoff, miss.method, nperbatch, select, 0, bgenInfo$M, bgenInfo$offset + 4, bgenInfo$N, bgenInfo$CompressionFlag, 0)
} else{
.Call(C_glmm_score_bgen11, as.numeric(res), obj$P, infile, outfile, center2, MAF.range[1], MAF.range[2], miss.cutoff, miss.method, nperbatch, select, 0, bgenInfo$M, bgenInfo$offset + 4, bgenInfo$N, bgenInfo$CompressionFlag, 0)
}
} else {
if (bgenInfo$LayoutFlag == 2) {
.Call(C_glmm_score_bgen13_sp, as.numeric(res), obj$Sigma_i, obj$Sigma_iX, obj$cov, infile, outfile, center2, MAF.range[1], MAF.range[2], miss.cutoff, miss.method, nperbatch, select, 0, bgenInfo$M, bgenInfo$offset + 4, bgenInfo$N, bgenInfo$CompressionFlag, 0)
} else{
.Call(C_glmm_score_bgen11_sp, as.numeric(res), obj$Sigma_i, obj$Sigma_iX, obj$cov, infile, outfile, center2, MAF.range[1], MAF.range[2], miss.cutoff, miss.method, nperbatch, select, 0, bgenInfo$M, bgenInfo$offset + 4, bgenInfo$N, bgenInfo$CompressionFlag, 0)
}
}
}
return(invisible(NULL))
} else { # text genotype files
if(ncores != 1) stop("Error: parallel computing currently not implemented for plain text format genotypes.")
if(inherits(obj, "glmmkin.multi")) stop("Error: multiple phenotypes currently not implemented for plain text format genotypes.")
if(is.null(infile.nrow)) {
if(Sys.info()["sysname"] != "Linux") {
infile.nrow <- length(readLines(infile))
} else {
if(grepl("\\.gz$", infile)) infile.nrow <- suppressWarnings(as.integer(system(paste("gzip -dc", infile, "| wc -l | gawk '{print $1}'"), intern = T)))
else if(grepl("\\.bz2$", infile)) infile.nrow <- suppressWarnings(as.integer(system(paste("bzip2 -dc", infile, "| wc -l | gawk '{print $1}'"), intern = T)))
else infile.nrow <- suppressWarnings(as.integer(system(paste("wc -l", infile, "| gawk '{print $1}'"), intern = T)))
}
if(any(is.na(infile.nrow))) infile.nrow <- length(readLines(infile))
}
if(!is.numeric(infile.nrow) | infile.nrow < 0)
stop("Error: number of rows of the input file is incorrect!")
if(!is.numeric(infile.nrow.skip) | infile.nrow.skip < 0)
stop("Error: number of skipped rows of the input file is incorrect!")
if(!is.numeric(infile.ncol.skip) | infile.ncol.skip < 0)
stop("Error: number of skipped cols of the input file is incorrect!")
if(length(infile.ncol.print) != length(infile.header.print))
stop("Error: number of cols selected to print does not match number of header names!")
if(is.null(infile.ncol.print))
infile.ncol.print <- 0
if(is.null(infile.header.print))
infile.header.print <- infile.na
if(any(!is.numeric(infile.ncol.print)) | any(infile.ncol.print < 0) | any(infile.ncol.print > infile.ncol.skip))
stop("Error: cols selected to print have incorrect indices!")
if(any(infile.ncol.print != sort(infile.ncol.print)))
stop("Error: col indices must be sorted increasingly in infile.ncol.print!")
if(is.null(select)) {
warning("Argument select is unspecified... Assuming the order of individuals in infile matches unique id_include in obj...")
select <- 1:length(unique(obj$id_include))
}
select2 <- select[select > 0]
if(any(duplicated(select2)) || max(select2) > length(unique(obj$id_include))) stop("Error: select is a vector of orders, individuals not in obj should be coded 0!")
res <- res[select2]
if(!is.null(obj$P)) obj$P <- obj$P[select2, select2]
else {
obj$Sigma_iX <- Matrix(obj$Sigma_iX[select2, , drop = FALSE], sparse = TRUE)
obj$Sigma_i <- Matrix(obj$Sigma_i[select2, select2], sparse = TRUE)
obj$cov <- Matrix(obj$cov, sparse = TRUE)
}
select[select > 0] <- 1:sum(select > 0)
rm(select2)
if(center) {
if(!is.null(obj$P)) time <- .Call(C_glmm_score_text, res, obj$P, infile, outfile, tol, 'c', MAF.range[1], MAF.range[2], miss.cutoff, miss.method, infile.nrow, infile.nrow.skip, infile.sep, infile.na, infile.ncol.skip, infile.ncol.print, infile.header.print, nperbatch, select)
else time <- .Call(C_glmm_score_text_sp, res, obj$Sigma_i, obj$Sigma_iX, obj$cov, infile, outfile, tol, 'c', MAF.range[1], MAF.range[2], miss.cutoff, miss.method, infile.nrow, infile.nrow.skip, infile.sep, infile.na, infile.ncol.skip, infile.ncol.print, infile.header.print, nperbatch, select)
} else {
if(!is.null(obj$P)) time <- .Call(C_glmm_score_text, res, obj$P, infile, outfile, tol, 'n', MAF.range[1], MAF.range[2], miss.cutoff, miss.method, infile.nrow, infile.nrow.skip, infile.sep, infile.na, infile.ncol.skip, infile.ncol.print, infile.header.print, nperbatch, select)
else time <- .Call(C_glmm_score_text_sp, res, obj$Sigma_i, obj$Sigma_iX, obj$cov, infile, outfile, tol, 'n', MAF.range[1], MAF.range[2], miss.cutoff, miss.method, infile.nrow, infile.nrow.skip, infile.sep, infile.na, infile.ncol.skip, infile.ncol.print, infile.header.print, nperbatch, select)
}
#print(sprintf("Computational time: %.2f seconds", time))
return(invisible(time))
}
}
glmm.score.meta <- function(files, outfile, SNP = rep("SNP", length(files)), A1 = rep("A1", length(files)), A2 = rep("A2", length(files))) {
k <- length(files)
if(length(SNP) != k) stop("Error: \"SNP\" must have the same length as \"files\"!")
if(length(A1) != k) stop("Error: \"A1\" must have the same length as \"files\"!")
if(length(A2) != k) stop("Error: \"A2\" must have the same length as \"files\"!")
master <- fread(files[1], header=T, data.table=F)[, c(SNP[1], A1[1], A2[1], "N", "AF", "SCORE", "VAR", "PVAL")]
names(master)[1:3] <- c("SNP", "A1", "A2")
master <- master[!is.na(master$SCORE) & !is.na(master$VAR) & !is.na(master$PVAL), ]
flag <- rep(0, nrow(master))
if(k > 1) {
for(i in 2:k) {
tmp <- fread(files[i], header=T, data.table=F)[, c(SNP[i], A1[i], A2[i], "N", "AF", "SCORE", "VAR", "PVAL")]
names(tmp)[1:3] <- c("SNP", "A1", "A2")
tmp <- tmp[!is.na(tmp$SCORE) & !is.na(tmp$VAR) & !is.na(tmp$PVAL), ]
idx <- tmp$SNP %in% master$SNP
if(sum(!idx) > 0) {
flag <- c(flag, rep(0, sum(!idx)))
master <- rbind(master, tmp[!idx, ])
}
idx2 <- match(tmp$SNP[idx], master$SNP)
noflip <- master$A1[idx2] == tmp$A1[idx] & master$A2[idx2] == tmp$A2[idx]
flip <- master$A1[idx2] == tmp$A2[idx] & master$A2[idx2] == tmp$A1[idx]
flag[idx2] <- flag[idx2] + as.numeric(!noflip & !flip)
master$AF[idx2][noflip] <- (master$AF[idx2][noflip]*master$N[idx2][noflip] + tmp$AF[idx][noflip]*tmp$N[idx][noflip])/(master$N[idx2][noflip] + tmp$N[idx][noflip])
master$AF[idx2][flip] <- (master$AF[idx2][flip]*master$N[idx2][flip] + (1 - tmp$AF[idx][flip])*tmp$N[idx][flip])/(master$N[idx2][flip] + tmp$N[idx][flip])
master$N[idx2] <- master$N[idx2] + tmp$N[idx]
master$SCORE[idx2][noflip] <- master$SCORE[idx2][noflip] + tmp$SCORE[idx][noflip]
master$SCORE[idx2][flip] <- master$SCORE[idx2][flip] - tmp$SCORE[idx][flip]
master$VAR[idx2] <- master$VAR[idx2] + tmp$VAR[idx]
}
if(any(flag > 0)) {
cat("The following SNPs have been removed due to inconsistent alleles across studies:\n")
print(master$SNP[flag > 0])
master <- subset(master, flag == 0)
}
master$PVAL <- pchisq(master$SCORE^2/master$VAR, 1, lower.tail=F)
}
write.table(master, outfile, sep="\t", row.names=F, col.names=T, quote=F)
invisible(master)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.