R/HIBAG.r
In HIBAG: HIBAG -- HLA Genotype Imputation with Attribute Bagging

#######################################################################
#
# Package Name: HIBAG v1.2.5
#
# Description:
#   HIBAG -- HLA Genotype Imputation with Attribute Bagging
#
# Author: Xiuwen Zheng
# License: GPL-3
# Email: zhengx@u.washington.edu
#


#######################################################################
#
# the internal functions
#
#######################################################################

#######################################################################
# get the name of HLA gene
#

.hla_gene_name_string <- function(geno.name)
{
	stopifnot(is.character(geno.name))
	i <- grep(paste("\\b",c(
			# HLA Classic I genes
			"A", "B", "C", "E", "F", "G",
			# HLA Classic II genes
			"DMA", "DMB", "DOA", "DOB",
			"DRA", "DRB\\d", "DQA\\d", "DQB\\d", "DPA\\d", "DPB\\d",
			# HLA Classic III genes
			"LTA", "TNF", "LTB", "HSPA1L", "HSPA1A", "HSPA1B",
			"C2", "BF", "C4A", "C4B"),
		"\\b", sep="", collapse="|"),
		geno.name)
	geno.name[i] <- paste("HLA -", geno.name[i])
	geno.name
}


#######################################################################
# call function in parallel
#

.DynamicClusterCall <- function(cl, fun, combine.fun, msg.fn, n,
	stop.cluster, ...)
{
	# in order to use the internal functions accessed by ':::'
	# the functions are all defined in 'parallel/R/snow.R'

	.SendData <- parse(text="parallel:::sendData(con, list(type=type,data=value,tag=tag))")
	.RecvOneData <- parse(text="parallel:::recvOneData(cl)")

	postNode <- function(con, type, value = NULL, tag = NULL)
	{
		eval(.SendData)
	}
	sendCall <- function(con, fun, args, return = TRUE, tag = NULL)
	{
		postNode(con, "EXEC",
			list(fun = fun, args = args, return = return, tag = tag))
		NULL
	}
	recvOneResult <- function(cl)
	{
		v <- eval(.RecvOneData)
		list(value = v$value$value, node = v$node, tag = v$value$tag)
	}


	#################################################################
	# check
	stopifnot(is.null(cl) | inherits(cl, "cluster"))
	stopifnot(is.function(fun))
	stopifnot(is.function(combine.fun))
	stopifnot(is.function(msg.fn))
	stopifnot(is.numeric(n))
	stopifnot(is.logical(stop.cluster))

	val <- NULL
	if (!is.null(cl))
	{
		p <- length(cl)
		if (n > 0L && p)
		{
			## **** this closure is sending to all nodes
			argfun <- function(i) c(i, list(...))

			submit <- function(node, job)
				sendCall(cl[[node]], fun, argfun(job), tag = job)

			for (i in 1:min(n, p)) submit(i, i)
			for (i in 1:n)
			{
				d <- recvOneResult(cl)
				j <- i + min(n, p)

				stopflag <- FALSE
				if (j <= n)
				{
					submit(d$node, j)
				} else {
					if (stop.cluster)
					{
						parallel::stopCluster(cl[d$node])
						cl <- cl[-d$node]
						stopflag <- TRUE
					}
				}

				dv <- d$value
				if (inherits(dv, "try-error"))
				{
					if (stop.cluster)
						parallel::stopCluster(cl)
					stop("One node produced an error: ", as.character(dv))
				}

				msg.fn(d$node, dv)
				if (stopflag)
					message(sprintf("Stop \"job %d\".", d$node))

				val <- combine.fun(val, dv)
			}
		}
	} else {
		for (i in 1:n)
		{
			dv <- fun(i, ...)
			msg.fn(i, dv)
			val <- combine.fun(val, dv)
		}
	}

	val
}



#######################################################################
#
# the functions for SNP genotypes and haplotypes
#
#######################################################################

#
#
# hlaSNPGenoClass is a class of SNP genotypes
# list:
#     genotype -- a genotype matrix, ``# of SNPs'' X ``# of samples''
#     sample.id -- sample id
#     snp.id -- snp id
#     snp.position -- snp positions in basepair
#     snp.allele -- snp alleles, ``A allele/B allele''
#     assembly -- the human genome reference, such like "hg19"
#
#
# hlaSNPHaploClass is a class of SNP haplotypes
# list:
#     haplotype -- a haplotype matrix, ``# of SNPs'' X ``2 x # of samples''
#     sample.id -- sample id
#     snp.id -- snp id
#     snp.position -- snp positions in basepair
#     snp.allele -- snp alleles, ``A allele/B allele''
#     assembly -- the human genome reference, such like "hg19"
#
#


#######################################################################
# To create a "hlaSNPGenoClass" object (SNP genotype object)
#

hlaMakeSNPGeno <- function(genotype, sample.id, snp.id, snp.position,
	A.allele, B.allele, assembly=c("auto", "hg18", "hg19", "unknown"))
{
	# check
	stopifnot(is.matrix(genotype))
	stopifnot(length(snp.id) == nrow(genotype))
	stopifnot(length(sample.id) == ncol(genotype))
	stopifnot(length(snp.id) == length(snp.position))
	stopifnot(length(snp.id) == length(A.allele))
	stopifnot(length(snp.id) == length(B.allele))
	stopifnot(is.character(A.allele))
	stopifnot(is.character(B.allele))

	assembly <- match.arg(assembly)
	if (assembly == "auto")
	{
		message("using the default genome assembly (assembly=\"hg19\")")
		assembly <- "hg19"
	}

	rv <- list(genotype = genotype, sample.id = sample.id, snp.id = snp.id,
		snp.position = snp.position,
		snp.allele = paste(A.allele, B.allele, sep="/"),
		assembly = assembly)
	class(rv) <- "hlaSNPGenoClass"

	# valid snp.id
	flag <- is.na(rv$snp.id)
	if (any(flag))
	{
		warning(sprintf(
			"There are %d SNPs with missing SNP id, and they have been removed.",
			sum(flag)))
		rv <- hlaGenoSubset(rv, snp.sel=!flag)
	}
	# valid snp.position
	flag <- is.na(rv$snp.position)
	if (any(flag))
	{
		warning(sprintf(
			"There are %d SNPs with missing SNP positions, and they have been removed.",
			sum(flag)))
		rv <- hlaGenoSubset(rv, snp.sel=!flag)
	}

	return(rv)
}


#######################################################################
# To create a "hlaSNPGenoClass" object (SNP genotype object)
#

hlaMakeSNPHaplo <- function(haplotype, sample.id, snp.id, snp.position,
	A.allele, B.allele, assembly=c("auto", "hg18", "hg19", "unknown"))
{
	# check
	stopifnot(is.matrix(haplotype))
	stopifnot(length(snp.id) == nrow(haplotype))
	stopifnot(2*length(sample.id) == ncol(haplotype))
	stopifnot(length(snp.id) == length(snp.position))
	stopifnot(length(snp.id) == length(A.allele))
	stopifnot(length(snp.id) == length(B.allele))
	stopifnot(is.character(A.allele))
	stopifnot(is.character(B.allele))

	assembly <- match.arg(assembly)
	if (assembly == "auto")
	{
		message("using the default genome assembly (assembly=\"hg19\")")
		assembly <- "hg19"
	}

	rv <- list(haplotype = haplotype, sample.id = sample.id, snp.id = snp.id,
		snp.position = snp.position,
		snp.allele = paste(A.allele, B.allele, sep="/"),
		assembly = assembly)
	class(rv) <- "hlaSNPHaploClass"

	# valid snp.id
	flag <- is.na(rv$snp.id)
	if (any(flag))
	{
		warning(sprintf(
			"There are %d SNPs with missing SNP id, and they have been removed.",
			sum(flag)))
		rv <- hlaHaploSubset(rv, snp.sel=!flag)
	}
	# valid snp.position
	flag <- is.na(rv$snp.position)
	if (any(flag))
	{
		warning(sprintf(
			"There are %d SNPs with missing SNP positions, and they have been removed.",
			sum(flag)))
		rv <- hlaHaploSubset(rv, snp.sel=!flag)
	}

	return(rv)
}


#######################################################################
# To select a subset of SNP genotypes
#

hlaGenoSubset <- function(genoobj, samp.sel=NULL, snp.sel=NULL)
{
	# check
	stopifnot(inherits(genoobj, "hlaSNPGenoClass"))
	stopifnot(is.null(samp.sel) | is.logical(samp.sel) | is.integer(samp.sel))
	if (is.logical(samp.sel))
		stopifnot(length(samp.sel) == length(genoobj$sample.id))
	stopifnot(is.null(snp.sel) | is.logical(snp.sel) | is.integer(snp.sel))
	if (is.logical(snp.sel))
		stopifnot(length(snp.sel) == length(genoobj$snp.id))
	if (is.integer(samp.sel))
	{
		stopifnot(!any(is.na(samp.sel)))
		stopifnot(length(unique(samp.sel)) == length(samp.sel))
	}
	if (is.integer(snp.sel))
	{
		stopifnot(!any(is.na(snp.sel)))
		stopifnot(length(unique(snp.sel)) == length(snp.sel))
	}

	# subset
	if (is.null(samp.sel))
		samp.sel <- rep(TRUE, length(genoobj$sample.id))
	if (is.null(snp.sel))
		snp.sel <- rep(TRUE, length(genoobj$snp.id))
	rv <- list(genotype = genoobj$genotype[snp.sel, samp.sel],
		sample.id = genoobj$sample.id[samp.sel],
		snp.id = genoobj$snp.id[snp.sel],
		snp.position = genoobj$snp.position[snp.sel],
		snp.allele = genoobj$snp.allele[snp.sel],
		assembly = genoobj$assembly
	)
	class(rv) <- "hlaSNPGenoClass"
	return(rv)
}


#######################################################################
# To select a subset of SNP haplotypes
#

hlaHaploSubset <- function(haploobj, samp.sel=NULL, snp.sel=NULL)
{
	# check
	stopifnot(inherits(haploobj, "hlaSNPHaploClass"))
	stopifnot(is.null(samp.sel) | is.logical(samp.sel) | is.integer(samp.sel))
	if (is.logical(samp.sel))
		stopifnot(length(samp.sel) == length(haploobj$sample.id))
	stopifnot(is.null(snp.sel) | is.logical(snp.sel) | is.integer(snp.sel))
	if (is.logical(snp.sel))
		stopifnot(length(snp.sel) == length(haploobj$snp.id))
	if (is.integer(samp.sel))
		stopifnot(length(unique(samp.sel)) == length(samp.sel))
	if (is.integer(snp.sel))
		stopifnot(length(unique(snp.sel)) == length(snp.sel))

	# subset
	if (is.null(samp.sel))
		samp.sel <- rep(TRUE, length(haploobj$sample.id))
	if (is.numeric(samp.sel))
	{
		v <- samp.sel
		samp.sel <- rep(FALSE, length(haploobj$sample.id))
		samp.sel[v] <- TRUE
	}
	samp.sel <- rep(samp.sel, each=2)
	if (is.null(snp.sel))
		snp.sel <- rep(TRUE, length(haploobj$snp.id))

	rv <- list(haplotype = haploobj$haplotype[snp.sel, samp.sel],
		sample.id = haploobj$sample.id[samp.sel],
		snp.id = haploobj$snp.id[snp.sel],
		snp.position = haploobj$snp.position[snp.sel],
		snp.allele = haploobj$snp.allele[snp.sel],
		assembly = haploobj$assembly
	)
	class(rv) <- "hlaSNPHaploClass"
	return(rv)
}


#######################################################################
# To select a subset of SNP genotypes
#

hlaHaplo2Geno <- function(hapobj)
{
	stopifnot(inherits(hapobj, "hlaSNPHaploClass"))
	n <- dim(hapobj$haplotype)[2]
	rv <- list(
		genotype = hapobj$haplotype[, seq(1,n,2)] + hapobj$haplotype[, seq(2,n,2)],
		sample.id = hapobj$sample.id,
		snp.id = hapobj$snp.id,
		snp.position = hapobj$snp.position,
		snp.allele = hapobj$snp.allele,
		assembly = hapobj$assembly
	)
	class(rv) <- "hlaSNPGenoClass"
	return(rv)
}


#######################################################################
# To get the overlapping SNPs between target and template with
#   corrected strand.
#

hlaGenoSwitchStrand <- function(target, template,
	match.type=c("RefSNP+Position", "RefSNP", "Position"),
	same.strand=FALSE, verbose=TRUE)
{
	# check
	stopifnot(inherits(target, "hlaSNPGenoClass") |
		inherits(target, "hlaSNPHaploClass"))
	stopifnot(inherits(template, "hlaSNPGenoClass") |
		inherits(template, "hlaSNPHaploClass") |
		inherits(template, "hlaAttrBagClass") |
		inherits(template, "hlaAttrBagObj"))
	stopifnot(is.logical(same.strand))
	stopifnot(is.logical(verbose))
	match.type <- match.arg(match.type)

	# initialize
	s1 <- hlaSNPID(template, match.type)
	s2 <- hlaSNPID(target, match.type)

	flag <- TRUE
	if (length(s1) == length(s2))
	{
		if (all(s1 == s2, na.rm=TRUE))
		{
			s <- s1
			I1 <- I2 <- seq_len(length(s1))
			flag <- FALSE
		}
	}
	if (flag)
	{
		s <- intersect(s1, s2)
		if (length(s) <= 0) stop("There is no common SNP.")
		I1 <- match(s, s1); I2 <- match(s, s2)
	}

	# compute allele frequencies
	if (inherits(template, "hlaSNPGenoClass"))
	{
		template.afreq <- rowMeans(template$genotype, na.rm=TRUE) * 0.5
	} else if (inherits(template, "hlaSNPHaploClass"))
	{
		template.afreq <- rowMeans(template$haplotype, na.rm=TRUE)
	} else {
		template.afreq <- template$snp.allele.freq
	}
	if (inherits(target, "hlaSNPGenoClass"))
	{
		target.afreq <- rowMeans(target$genotype, na.rm=TRUE) * 0.5
	} else {
		target.afreq <- rowMeans(target$haplotype, na.rm=TRUE)
	}

	# call
	gz <- .C("HIBAG_AlleleStrand",
		template$snp.allele, template.afreq, I1,
		target$snp.allele, target.afreq, I2,
		same.strand, length(s), out=logical(length(s)),
		out.n.ambiguity=integer(1), out.n.mismatching=integer(1),
		err=integer(1), NAOK=TRUE, PACKAGE="HIBAG")
	if (gz$err != 0) stop(hlaErrMsg())

	if (verbose)
	{
		# switched allele pairs
		x <- sum(gz$out)
		if (x > 0)
		{
			if (x > 1)
			{
				a <- "are"; s <- "s"
			} else {
				a <- "is"; s <- ""
			}
			cat(sprintf(
				"There %s %d variant%s in total with switched allelic strand order%s.\n",
				a, x, s, s))
		} else {
			cat("No allelic strand orders are switched.\n")
		}

		# the number of ambiguity
		if (gz$out.n.ambiguity > 0)
		{
			if (gz$out.n.ambiguity > 1)
			{
				a <- "are"; s <- "s"
			} else {
				a <- "is"; s <- ""
			}
			cat(sprintf(
				"Due to stand ambiguity (such like C/G), the allelic strand order%s of %d variant%s %s determined by comparing allele frequencies.\n",
				s, gz$out.n.ambiguity, s, a))
		}

		# the number of mismatching
		if (gz$out.n.mismatching > 0)
		{
			if (gz$out.n.mismatching > 1)
			{
				a <- "are"; s <- "s"
			} else {
				a <- "is"; s <- ""
			}
			cat(sprintf(
				"Due to mismatching alleles, the allelic strand order%s of %d variant%s %s determined by comparing allele frequencies.\n",
				s, gz$out.n.mismatching, s, a))
		}
	}

	# result
	if (inherits(target, "hlaSNPGenoClass"))
	{
		geno <- target$genotype[I2, ]
		if (is.vector(geno))
			geno <- matrix(geno, ncol=1)
		for (i in which(gz$out)) geno[i, ] <- 2 - geno[i, ]
		rv <- list(genotype = geno)
		rv$sample.id <- target$sample.id
		rv$snp.id <- target$snp.id[I2]
		rv$snp.position <- target$snp.position[I2]
		rv$snp.allele <- template$snp.allele[I1]
		rv$assembly <- template$assembly
		class(rv) <- "hlaSNPGenoClass"

	} else {
		haplo <- target$haplotype[I2, ]
		if (is.vector(haplo))
			haplo <- matrix(haplo, ncol=1)
		for (i in which(gz$out)) haplo[i, ] <- 1 - haplo[i, ]
		rv <- list(haplotype = haplo)
		rv$sample.id <- target$sample.id
		rv$snp.id <- target$snp.id[I2]
		rv$snp.position <- target$snp.position[I2]
		rv$snp.allele <- template$snp.allele[I1]
		rv$assembly <- template$assembly
		class(rv) <- "hlaSNPHaploClass"
	}

	return(rv)
}


#######################################################################
# To get the information of SNP ID and position
#

hlaSNPID <- function(obj, type=c("RefSNP+Position", "RefSNP", "Position"))
{
	stopifnot( inherits(obj, "hlaSNPGenoClass") |
		inherits(obj, "hlaSNPHaploClass") | inherits(obj, "hlaAttrBagClass") |
		inherits(obj, "hlaAttrBagObj") )
	type <- match.arg(type)
	if (type == "RefSNP+Position")
		paste(obj$snp.id, obj$snp.position, sep="-")
	else if (type == "RefSNP")
		obj$snp.id
	else if (type == "Position")
		obj$snp.position
	else
		invisible()
}


#######################################################################
# To combine two SNP genotype dataset
#

hlaGenoCombine <- function(geno1, geno2,
	match.type=c("RefSNP+Position", "RefSNP", "Position"),
	allele.check=TRUE, same.strand=FALSE, verbose=TRUE)
{
	# check
	stopifnot(inherits(geno1, "hlaSNPGenoClass"))
	stopifnot(inherits(geno2, "hlaSNPGenoClass"))
	stopifnot(is.logical(allele.check))
	stopifnot(is.logical(same.strand))
	stopifnot(is.logical(verbose))
	match.type <- match.arg(match.type)

	if (allele.check)
	{
		tmp2 <- hlaGenoSwitchStrand(geno2, geno1, match.type,
			same.strand, verbose)
		tmp1 <- hlaGenoSubset(geno1, snp.sel=
			match(hlaSNPID(tmp2, match.type), hlaSNPID(geno1, match.type)))
	} else {
		s1 <- hlaSNPID(geno1, match.type)
		s2 <- hlaSNPID(geno2, match.type)
		set <- unique(intersect(s1, s2))
		tmp1 <- hlaGenoSubset(geno1, snp.sel=match(set, s1))
		tmp2 <- hlaGenoSubset(geno2, snp.sel=match(set, s2))
	}

	rv <- list(genotype = cbind(tmp1$genotype, tmp2$genotype),
		sample.id = c(tmp1$sample.id, tmp2$sample.id),
		snp.id = tmp1$snp.id, snp.position = tmp1$snp.position,
		snp.allele = tmp1$snp.allele,
		assembly = tmp1$assembly)
	colnames(rv$genotype) <- NULL
	rownames(rv$genotype) <- NULL

	class(rv) <- "hlaSNPGenoClass"
	return(rv)
}


#######################################################################
# Convert to PLINK PED format
#

hlaGeno2PED <- function(geno, out.fn)
{
	# check
	stopifnot(inherits(geno, "hlaSNPGenoClass"))
	stopifnot(is.character(out.fn))

	# MAP file
	rv <- data.frame(chr=rep(6, length(geno$snp.id)), rs=geno$snp.id,
		morgan=rep(0, length(geno$snp.id)), bp=geno$snp.position,
		stringsAsFactors=FALSE)
	write.table(rv, file=paste(out.fn, ".map", sep=""),
		row.names=FALSE, col.names=FALSE, quote=FALSE)

	# PED file
	n <- length(geno$sample.id)
	m <- matrix("", nrow=n, ncol=2*length(geno$snp.id))
	for (i in 1:length(geno$snp.id))
	{
		allele <- unlist(strsplit(geno$snp.allele[i], "/"))
		g <- geno$genotype[i, ] + 1
		m[, 2*(i-1)+1] <- allele[c(2, 1, 1)[g]]
		m[, 2*(i-1)+2] <- allele[c(2, 2, 1)[g]]
	}
	rv <- cbind(Family=geno$sample.id, Ind=geno$sample.id,
		Paternal=rep(0, n), Maternal=rep(0, n),
		Sex=rep(0, n), Pheno=rep(-9, n), m)
	write.table(rv, file=paste(out.fn, ".ped", sep=""),
		row.names=FALSE, col.names=FALSE, quote=FALSE)

	# return
	return(invisible(NULL))
}


#######################################################################
# Convert from PLINK BED format
#

hlaBED2Geno <- function(bed.fn, fam.fn, bim.fn, rm.invalid.allele=FALSE,
	import.chr="xMHC", assembly=c("auto", "hg18", "hg19", "unknown"),
	verbose=TRUE)
{
	# check
	stopifnot(is.character(bed.fn) & (length(bed.fn)==1))
	stopifnot(is.character(fam.fn) & (length(fam.fn)==1))
	stopifnot(is.character(bim.fn) & (length(bim.fn)==1))
	stopifnot(is.character(import.chr))
	stopifnot(is.logical(rm.invalid.allele) & (length(rm.invalid.allele)==1))
	stopifnot(is.logical(verbose) & (length(verbose)==1))

	assembly <- match.arg(assembly)
	if (assembly == "auto")
	{
		message("using the default genome assembly (assembly=\"hg19\")")
		warning("Please explicitly specify the argument 'assembly=\"hg18\"' or 'assembly=\"hg19\"'.")
		assembly <- "hg19"
	} else if (assembly == "unknown")
	{
		warning("Please explicitly specify the argument 'assembly=\"hg18\"' or 'assembly=\"hg19\"'.")
	}

	# detect bed.fn
	bed <- .C("HIBAG_BEDFlag", bed.fn, snporder=integer(1), err=integer(1),
		NAOK=TRUE, PACKAGE="HIBAG")
	if (bed$err != 0) stop(hlaErrMsg())
	if (verbose)
	{
		cat("Open \"", bed.fn, sep="")
		if (bed$snporder == 0)
			cat("\" in the individual-major mode.\n")
		else
			cat("\" in the SNP-major mode.\n")
	}

	# read fam.fn
	famD <- read.table(fam.fn, header=FALSE, stringsAsFactors=FALSE)
	names(famD) <- c("FamilyID", "InvID", "PatID", "MatID", "Sex", "Pheno")
	if (length(unique(famD$InvID)) == dim(famD)[1])
	{
		sample.id <- famD$InvID
	} else {
		sample.id <- paste(famD$FamilyID, famD$InvID, sep="-")
		if (length(unique(sample.id)) != dim(famD)[1])
			stop("IDs in PLINK bed are not unique!")
	}
	if (verbose)
		cat("Open \"", fam.fn, "\".\n", sep="")

	# read bim.fn
	bimD <- read.table(bim.fn, header=FALSE, stringsAsFactors=FALSE)
	names(bimD) <- c("chr", "snp.id", "map", "pos", "allele1", "allele2")

	# chromosome
	chr <- bimD$chr; chr[is.na(chr)] <- ""
	# position
	snp.pos <- bimD$pos
	snp.pos[!is.finite(snp.pos)] <- 0

	# snp.id
	snp.id <- bimD$snp.id
	if (length(snp.id) != length(unique(snp.id)))
		stop("The SNP IDs in the PLINK binary file should be unique!")

	# snp allele
	snp.allele <- paste(bimD$allele1, bimD$allele2, sep="/")
	if (verbose)
		cat("Open \"", bim.fn, "\".\n", sep="")

	# SNP selection
	if (length(import.chr) == 1)
	{
		if (import.chr == "xMHC")
		{
			if (assembly %in% c("hg18", "NCBI36"))
			{
				snp.flag <- (chr==6) & (25759242<=snp.pos) & (snp.pos<=33534827)
			} else if (assembly %in% c("hg19", "NCBI37"))
			{
				snp.flag <- (chr==6) & (25651242<=snp.pos) & (snp.pos<=33544122)
			} else {
				stop("Invalid genome assembly.")
			}
			n.snp <- as.integer(sum(snp.flag))
			if (verbose)
			{
				cat(sprintf("Import %d SNPs within the xMHC region on chromosome 6.\n",
					n.snp))
			}
			import.chr <- NULL
		} else if (import.chr == "")
		{
			n.snp <- length(snp.id)
			snp.flag <- rep(TRUE, n.snp)
			if (verbose)
				cat(sprintf("Import %d SNPs.\n", n.snp))
			import.chr <- NULL
		}
	}
	if (!is.null(import.chr))
	{
		snp.flag <- (chr %in% import.chr) & (snp.pos>0)
		n.snp <- as.integer(sum(snp.flag))
		if (verbose)
		{
			cat(sprintf("Import %d SNPs from chromosome %s.\n", n.snp,
				paste(import.chr, collapse=",")))
		}
	}
	if (n.snp <= 0) stop("There is no SNP imported.")

	# call the C function
	rv <- .C("HIBAG_ConvBED", bed.fn, length(sample.id), length(snp.id), n.snp,
		(bed$snporder==0), snp.flag, verbose,
		geno = matrix(as.integer(0), nrow=n.snp, ncol=length(sample.id)),
		err=integer(1), NAOK=TRUE, PACKAGE="HIBAG")
	if (rv$err != 0) stop(hlaErrMsg())

	# result
	v <- list(genotype = rv$geno, sample.id = sample.id, snp.id = snp.id[snp.flag],
		snp.position = snp.pos[snp.flag], snp.allele = snp.allele[snp.flag],
		assembly = assembly)
	class(v) <- "hlaSNPGenoClass"

	# remove invalid snps
	if (rm.invalid.allele)
	{
		# check duplicated SNP ID
		flag <- duplicated(v$snp.id)
		if (any(flag))
		{
			if (verbose)
			{
				cat(sprintf("%d SNPs with duplicated ID have been removed.\n",
					sum(flag)))
			}
			v <- hlaGenoSubset(v, snp.sel=!flag)
		}

		# check invalid alleles
		snp.allele <- v$snp.allele
		snp.allele[is.na(snp.allele)] <- "?/?"
		flag <- sapply(strsplit(snp.allele, "/"),
			function(x)
			{
				if (length(x) == 2)
				{
					all(x %in% c("A", "G", "C", "T"))
				} else {
					FALSE
				}
			}
		)
		if (any(!flag) & verbose)
			cat(sprintf("%d SNPs with invalid alleles have been removed.\n", sum(!flag)))

		# get a subset
		v <- hlaGenoSubset(v, snp.sel=flag)
	}

	return(v)
}


#######################################################################
# Convert from SNP GDS format (SNPRelate)
#

hlaGDS2Geno <- function(gds.fn, rm.invalid.allele=FALSE,
	import.chr="xMHC", assembly=c("auto", "hg18", "hg19", "unknown"),
	verbose=TRUE)
{
	# library
	eval(parse(text='
		if (!require(SNPRelate))
			stop("The SNPRelate package should be installed.")
	'))

	# check
	stopifnot(is.character(gds.fn) & is.vector(gds.fn))
	stopifnot(length(gds.fn) == 1)

	stopifnot(is.logical(rm.invalid.allele) & is.vector(rm.invalid.allele))
	stopifnot(length(rm.invalid.allele) == 1)

	stopifnot(is.character(import.chr))

	stopifnot(is.logical(verbose) & is.vector(verbose))
	stopifnot(length(verbose) == 1)

	assembly <- match.arg(assembly)
	if (assembly == "auto")
	{
		message("using the default genome assembly (assembly=\"hg19\")")
		warning("Please explicitly specify the argument 'assembly=\"hg18\"' or 'assembly=\"hg19\"'.")
		assembly <- "hg19"
	} else if (assembly == "unknown")
	{
		warning("Please explicitly specify the argument 'assembly=\"hg18\"' or 'assembly=\"hg19\"'.")
	}


	####  open the GDS SNP file  ####

	chr <- NULL
	snp.pos <- NULL
	snp.id <- NULL

	eval(parse(text='gfile <- snpgdsOpen(gds.fn)'))
	on.exit(eval(parse(text='snpgdsClose(gfile)')))

	# snp.id
	eval(parse(text='
		snp.id <- read.gdsn(index.gdsn(gfile, "snp.id"))
		if (!is.null(index.gdsn(gfile, "snp.rs.id", silent=TRUE)))
		{
			snp.rsid <- read.gdsn(index.gdsn(gfile, "snp.rs.id"))
		} else
			snp.rsid <- snp.id
	'))

	# chromosome
	eval(parse(text='
		chr <- read.gdsn(index.gdsn(gfile, "snp.chromosome"))
	'))

	# position
	eval(parse(text='
		snp.pos <- read.gdsn(index.gdsn(gfile, "snp.position"))
		snp.pos[!is.finite(snp.pos)] <- 0
	'))

	# SNP selection
	if (length(import.chr) == 1)
	{
		if (import.chr == "xMHC")
		{
			if (assembly %in% c("hg18", "NCBI36"))
			{
				snp.flag <- (chr==6) & (25759242<=snp.pos) & (snp.pos<=33534827)
			} else if (assembly %in% c("hg19", "NCBI37"))
			{
				snp.flag <- (chr==6) & (25651242<=snp.pos) & (snp.pos<=33544122)
			} else {
				stop("Invalid genome assembly.")
			}
			n.snp <- as.integer(sum(snp.flag))
			if (verbose)
			{
				cat(sprintf("Import %d SNPs within the xMHC region on chromosome 6.\n",
					n.snp))
			}
			import.chr <- NULL
		} else if (import.chr == "")
		{
			n.snp <- length(snp.id)
			snp.flag <- rep(TRUE, n.snp)
			if (verbose)
				cat(sprintf("Import %d SNPs.\n", n.snp))
			import.chr <- NULL
		}
	}
	if (!is.null(import.chr))
	{
		snp.flag <- (chr %in% import.chr) & (snp.pos>0)
		n.snp <- as.integer(sum(snp.flag))
		if (verbose)
		{
			cat(sprintf("Import %d SNPs from chromosome %s.\n", n.snp,
				paste(import.chr, collapse=",")))
		}
	}
	if (n.snp <= 0) stop("There is no SNP imported.")

	# result
	eval(parse(text='
		v <- list(genotype = snpgdsGetGeno(gfile, snp.id=snp.id[snp.flag],
				snpfirstdim=TRUE, verbose=FALSE),
			sample.id = read.gdsn(index.gdsn(gfile, "sample.id")),
			snp.id = snp.rsid[snp.flag],
			snp.position = snp.pos[snp.flag],
			snp.allele = read.gdsn(index.gdsn(gfile, "snp.allele"))[snp.flag],
			assembly = assembly)
	'))
	class(v) <- "hlaSNPGenoClass"

	# remove invalid snps
	if (rm.invalid.allele)
	{
		# check duplicated SNP ID
		flag <- duplicated(v$snp.id)
		if (any(flag))
		{
			if (verbose)
			{
				cat(sprintf("%d SNPs with duplicated ID have been removed.\n",
					sum(flag)))
			}
			v <- hlaGenoSubset(v, snp.sel=!flag)
		}

		# check invalid alleles
		snp.allele <- v$snp.allele
		snp.allele[is.na(snp.allele)] <- "?/?"
		flag <- sapply(strsplit(snp.allele, "/"),
			function(x)
			{
				if (length(x) == 2)
				{
					all(x %in% c("A", "G", "C", "T"))
				} else {
					FALSE
				}
			}
		)
		if (any(!flag) & verbose)
		{
			cat(sprintf("%d SNPs with invalid alleles have been removed.\n",
				sum(!flag)))
		}
		v <- hlaGenoSubset(v, snp.sel=flag)
	}

	return(v)
}



#######################################################################
# Summarize a "hlaSNPGenoClass" object
#

summary.hlaSNPGenoClass <- function(object, show=TRUE, ...)
{
	# check
	stopifnot(inherits(object, "hlaSNPGenoClass"))
	geno <- object

	fn <- function(x)
	{
		sprintf("min: %g, max: %g, mean: %g, median: %g, sd: %g",
			min(x, na.rm=TRUE), max(x, na.rm=TRUE),
			mean(x, na.rm=TRUE), median(x, na.rm=TRUE), sd(x, na.rm=TRUE))
	}

	rv <- list(mr.snp = hlaGenoMRate(geno), mr.samp = hlaGenoMRate_Samp(geno),
		maf = hlaGenoMFreq(geno),
		allele = table(geno$snp.allele))

	if (show)
	{
		cat("SNP genotypes: \n")
		cat(sprintf("\t%d samples X %d SNPs\n",
			length(geno$sample.id), length(geno$snp.id)))
		cat(sprintf("\tSNPs range from %dbp to %dbp",
			min(geno$snp.position, na.rm=TRUE),
			max(geno$snp.position, na.rm=TRUE)))
		if (!is.null(geno$assembly))
			cat(" on ", geno$assembly, "\n", sep="")
		else
			cat("\n")

		# missing rate for SNP
		cat(sprintf("Missing rate per SNP:\n\t%s\n", fn(rv$mr.snp)))
		# missing rate for sample
		cat(sprintf("Missing rate per sample:\n\t%s\n", fn(rv$mr.samp)))

		# minor allele frequency
		cat(sprintf("Minor allele frequency:\n\t%s\n", fn(rv$maf)))

		# allele information
		cat("Allelic information:")
		print(rv$allele)
	}

	# return
	return(invisible(rv))
}


#######################################################################
# Summarize a "hlaSNPHaploClass" object
#

summary.hlaSNPHaploClass <- function(object, show=TRUE, ...)
{
	# check
	stopifnot(inherits(object, "hlaSNPHaploClass"))
	haplo <- object

	fn <- function(x)
	{
		sprintf("min: %g, max: %g, mean: %g, median: %g, sd: %g",
			min(x, na.rm=TRUE), max(x, na.rm=TRUE),
			mean(x, na.rm=TRUE), median(x, na.rm=TRUE), sd(x, na.rm=TRUE))
	}

	rv <- list(mr.snp = hlaGenoMRate(haplo), mr.samp = hlaGenoMRate_Samp(haplo),
		maf = hlaGenoMFreq(haplo),
		allele = table(haplo$snp.allele))

	if (show)
	{
		cat("SNP Haplotypes: \n")
		cat(sprintf("\t%d samples X %d SNPs\n",
			length(haplo$sample.id), length(haplo$snp.id)))
		cat(sprintf("\tSNPs range from %dbp to %dbp",
			min(haplo$snp.position, na.rm=TRUE), max(haplo$snp.position, na.rm=TRUE)))
		if (!is.null(haplo$assembly))
			cat(" on ", haplo$assembly, "\n", sep="")
		else
			cat("\n")

		# missing rate for SNP
		cat(sprintf("Missing rate per SNP:\n\t%s\n", fn(rv$mr.snp)))
		# missing rate for sample
		cat(sprintf("Missing rate per sample:\n\t%s\n", fn(rv$mr.samp)))

		# minor allele frequency
		cat(sprintf("Minor allele frequency:\n\t%s\n", fn(rv$maf)))

		# allele information
		cat("Allelic information:")
		print(rv$allele)
	}

	# return
	return(invisible(rv))
}




#######################################################################
#
# the function list for genotypes and haplotypes
#
#######################################################################

#######################################################################
# To the allele frequencies from genotypes or haplotypes
#

hlaGenoAFreq <- function(obj)
{
	# check
	stopifnot(inherits(obj, "hlaSNPGenoClass") |
		inherits(obj, "hlaSNPHaploClass"))
    if (inherits(obj, "hlaSNPGenoClass"))
    {
        rowMeans(obj$genotype, na.rm=TRUE) * 0.5
    } else {
        rowMeans(obj$haplotype, na.rm=TRUE)
    }
}


#######################################################################
# To the minor allele frequencies from genotypes or haplotypes
#

hlaGenoMFreq <- function(obj)
{
	# check
	stopifnot(inherits(obj, "hlaSNPGenoClass") |
		inherits(obj, "hlaSNPHaploClass"))
    if (inherits(obj, "hlaSNPGenoClass"))
    {
        F <- rowMeans(obj$genotype, na.rm=TRUE) * 0.5
    } else {
        F <- rowMeans(obj$haplotype, na.rm=TRUE)
    }
    pmin(F, 1-F)
}


#######################################################################
# To the missing rates from genotypes or haplotypes per SNP
#

hlaGenoMRate <- function(obj)
{
	# check
	stopifnot(inherits(obj, "hlaSNPGenoClass") |
		inherits(obj, "hlaSNPHaploClass"))
    if (inherits(obj, "hlaSNPGenoClass"))
    {
		rowMeans(is.na(obj$genotype))
	} else {
		rowMeans(is.na(obj$haplotype))
	}
}


#######################################################################
# To the missing rates from genotypes or haplotypes per sample
#

hlaGenoMRate_Samp <- function(obj)
{
	# check
	stopifnot(inherits(obj, "hlaSNPGenoClass") |
		inherits(obj, "hlaSNPHaploClass"))
    if (inherits(obj, "hlaSNPGenoClass"))
    {
        colMeans(is.na(obj$genotype))
    } else {
        F <- colMeans(is.na(obj$haplotype))
        F[seq(1, length(F), 2)] + F[seq(2, length(F), 2)]
    }
}






#######################################################################
#
# the function list for HLA types
#
#######################################################################


#######################################################################
# To get the starting and ending positions in basepair for HLA loci
#

hlaLociInfo <- function(assembly =
	c("auto", "auto-silent", "hg18", "hg19", "unknown"))
{
	# check
	assembly <- match.arg(assembly)
	if (assembly == "auto")
	{
		message("using the default genome assembly (assembly=\"hg19\")")
		assembly <- "hg19"
	} else if (assembly == "auto-silent")
	{
		assembly <- "hg19"
	}

	if (assembly == "hg18")
	{
		# the name of HLA genes
		ID <- c("A", "B", "C", "DRB1", "DRB5", "DQA1", "DQB1", "DPB1", "any")

		# starting position
		pos.HLA.start <- as.integer(c(30018310, 31429628, 31344508, 32654527,
			32593129, 32713161, 32735635, 33151738, NA))

		# ending position
		pos.HLA.end <- as.integer(c(30021633, 31432914, 31347834, 32665559,
			32605984, 32719407, 32742444, 33162954, NA))
	} else if (assembly == "hg19")
	{
		# http://atlasgeneticsoncology.org/Genes/GC_HLA-A.html

		# the name of HLA genes
		ID <- c(
			# HLA Classic I
			"A", "B", "C", "E", "F", "G",
			# HLA Classic II
			"DMA", "DMB", "DOA", "DOB", "DRA", "DRB1", "DRB5",
			"DQA1", "DQB1", "DPA1", "DPB1",
			# HLA Classic III
			"BF", "C4A", "C4B", "HSPA1A", "HSPA1B", "HSPA1L", "LTA", "LTB", "TNF",
			"any")

		# starting position
		pos.HLA.start <- as.integer(c(
			29910247,  # A
			31321649,  # B
			31236526,  # C
			30457183,  # E
			29691117,  # F
			29794756,  # G
			32916390,  # DMA
			32902406,  # DMB
			32971960,  # DOA
			32780540,  # DOB
			32407619,  # DRA
			32546547,  # DRB1
			32485154,  # DRB5
			32605183,  # DQA1
			32627241,  # DQB1
			33032346,  # DPA1
			33043703,  # DPB1
			31913721,  # BF
			31949834,  # C4A
			31949834,  # C4B
			31783291,  # HSPA1A
			31795512,  # HSPA1B
			31777396,  # HSPA1L
			31540071,  # LTA
			31548336,  # LTB
			31543344,  # TNF
			NA))

		# ending position
		pos.HLA.end <- as.integer(c(
			29913661,  # A
			31324989,  # B
			31239913,  # C
			30461982,  # E
			29694303,  # F
			29798899,  # G
			32936871,  # DMA
			32908847,  # DMB
			32977389,  # DOA
			32784825,  # DOB
			32412826,  # DRA
			32557613,  # DRB1
			32498006,  # DRB5
			32611429,  # DQA1
			32634466,  # DQB1
			33048555,  # DPA1
			33057473,  # DPB1
			31919861,  # BF
			31970457,  # C4A
			31970458,  # C4B
			31785719,  # HSPA1A
			31798031,  # HSPA1B
			31782835,  # HSPA1L
			31542100,  # LTA
			31550202,  # LTB
			31546112,  # TNF
			NA))
	} else {
		stop("Unknown human genome reference in 'assembly'!")
	}

	# length in basepair
	length.HLA <- (pos.HLA.end - pos.HLA.start + 1L)

	# the names of HLA genes
	names(pos.HLA.start) <- ID
	names(pos.HLA.end) <- ID
	names(length.HLA) <- ID

	# return
	return(list(loci = ID,
		pos.HLA.start = pos.HLA.start, pos.HLA.end = pos.HLA.end,
		length.HLA = length.HLA,
		assembly = assembly))
}


#######################################################################
# Limit the resolution of HLA alleles
#

hlaAlleleDigit <- function(obj, max.resolution="4-digit", rm.suffix=FALSE)
{
	# check
	stopifnot(inherits(obj, "hlaAlleleClass") | is.character(obj))
	stopifnot(is.logical(rm.suffix))
	if (is.character(obj))
		stopifnot(is.vector(obj))
	stopifnot(max.resolution %in% c("2-digit", "4-digit", "6-digit", "8-digit",
		"allele", "protein", "2", "4", "6", "8", "full", ""))

	if (!(max.resolution %in% c("full", "")))
	{
		if (is.character(obj))
		{
			len <- c(1, 2, 3, 4, 1, 2, 1, 2, 3, 4)
			names(len) <- c("2-digit", "4-digit", "6-digit", "8-digit",
				"allele", "protein", "2", "4", "6", "8")
			maxlen <- len[[as.character(max.resolution)]]

			obj <- sapply(strsplit(obj, ":"), FUN =
					function(s, idx) {
						if (any(is.na(s)))
						{
							NA
						} else {
							if (length(idx) < length(s)) s <- s[idx]
							if (length(idx) == length(s))
							{
								if (rm.suffix & (nchar(s[length(s)])>0))
								{
									z <- unlist(strsplit(s[length(s)], ""))
									for (i in 1:length(z))
									{
										if (!(z[i] %in% as.character(0:9)))
										{
											if (i > 1)
												z <- z[1:(i-1)]
											break
										}
									}
									s[length(s)] <- paste(z, collapse="")
								}
							}
							paste(s, collapse=":")
						}
					},
				idx = 1:maxlen)
		} else {
			rv <- list(locus = obj$locus,
				pos.start = obj$pos.start, pos.end = obj$pos.end,
				value = data.frame(sample.id = obj$value$sample.id,
					allele1 = hlaAlleleDigit(obj$value$allele1, max.resolution),
					allele2 = hlaAlleleDigit(obj$value$allele2, max.resolution),
					stringsAsFactors=FALSE),
				assembly = obj$assembly
			)
			if ("prob" %in% names(obj$value))
				rv$value$prob <- obj$value$prob
			class(rv) <- "hlaAlleleClass"
			obj <- rv
		}
	}

	return(obj)
}


#######################################################################
# Get unique HLA alleles
#

hlaUniqueAllele <- function(hla)
{
	# check
	stopifnot(is.character(hla) | inherits(hla, "hlaAlleleClass"))

	if (is.character(hla))
	{
		hla <- hla[!is.na(hla)]
		hla <- unique(hla)
		rv <- .C("HIBAG_SortAlleleStr", length(hla), hla,
			out = character(length(hla)),
			err = integer(1), NAOK = TRUE, PACKAGE = "HIBAG")
		if (rv$err != 0) stop(hlaErrMsg())
		rv$out
	} else {
		hlaUniqueAllele(as.character(c(hla$value$allele1, hla$value$allele2)))
	}
}


#######################################################################
# To make a class of HLA alleles
#

hlaAllele <- function(sample.id, H1, H2, max.resolution="", locus="any",
	assembly=c("auto", "auto-silent", "hg18", "hg19", "unknown"),
	locus.pos.start=NA, locus.pos.end=NA, prob=NULL, na.rm=TRUE)
{
	# check
	stopifnot(is.vector(sample.id))
	stopifnot(is.vector(H1) & is.character(H1))
	stopifnot(is.vector(H2) & is.character(H2))
	stopifnot(length(sample.id) == length(H1))
	stopifnot(length(sample.id) == length(H2))
	stopifnot(max.resolution %in% c("2-digit", "4-digit", "6-digit",
		"8-digit", "allele", "protein", "2", "4", "6", "8", "full", ""))

	HLAinfo <- hlaLociInfo(assembly)
	if (!is.null(prob))
		stopifnot(length(sample.id) == length(prob))

	# build
	H1[H1 == ""] <- NA
	H1 <- hlaAlleleDigit(H1, max.resolution)
	H2[H2 == ""] <- NA
	H2 <- hlaAlleleDigit(H2, max.resolution)

	if (locus %in% names(HLAinfo$pos.HLA.start))
	{
		if (!is.finite(locus.pos.start))
			locus.pos.start <- HLAinfo$pos.HLA.start[[locus]]
		if (!is.finite(locus.pos.end))
			locus.pos.end <- HLAinfo$pos.HLA.end[[locus]]
	}

	# remove missing values
	if (na.rm)
		flag <- (!is.na(H1)) & (!is.na(H2))
	else
		flag <- rep(TRUE, length(sample.id))

	# result
	rv <- list(locus = locus,
		pos.start = locus.pos.start, pos.end = locus.pos.end,
		value = data.frame(sample.id = sample.id[flag],
			allele1 = H1[flag], allele2 = H2[flag],
			stringsAsFactors=FALSE),
		assembly = HLAinfo$assembly
	)
	if (!is.null(prob))
		rv$value$prob <- prob[flag]
	class(rv) <- "hlaAlleleClass"
	return(rv)
}


#######################################################################
# To make a class of HLA alleles
#

hlaAlleleSubset <- function(hla, samp.sel=NULL)
{
	# check
	stopifnot(inherits(hla, "hlaAlleleClass"))
	stopifnot(is.null(samp.sel) | is.logical(samp.sel) | is.integer(samp.sel))
	if (is.logical(samp.sel))
		stopifnot(length(samp.sel) == dim(hla$value)[1])
	if (is.integer(samp.sel))
		stopifnot(length(unique(samp.sel)) == length(samp.sel))

	# result
	if (is.null(samp.sel))
		samp.sel <- rep(TRUE, dim(hla$value)[1])
	rv <- list(locus = hla$locus,
		pos.start = hla$pos.start, pos.end = hla$pos.end,
		value = hla$value[samp.sel, ],
		assembly = hla$assembly
	)

	if (!is.null(hla$postprob))
	{
		rv$postprob <- hla$postprob[, samp.sel]
		if (is.vector(rv$postprob))
		{
			rv$postprob <- matrix(rv$postprob, ncol=1)
		}
	}

	class(rv) <- "hlaAlleleClass"
	return(rv)
}


#######################################################################
# To combine two classes of HLA alleles
#

hlaCombineAllele <- function(H1, H2)
{
	# check
	stopifnot(inherits(H1, "hlaAlleleClass"))
	stopifnot(inherits(H2, "hlaAlleleClass"))
	stopifnot(length(intersect(H1$sample.id, H2$sample.id)) == 0)
	stopifnot(H1$locus == H2$locus)
	stopifnot(H1$pos.start == H2$pos.start)
	stopifnot(H1$pos.end == H2$pos.end)

	id <- c("sample.id", "allele1", "allele2")

	# result
	rv <- list(locus = H1$locus,
		pos.start = H1$pos.start, pos.end = H1$pos.end,
		value = rbind(H1$value[, id], H2$value[, id]),
		assembly = H1$assembly
	)
	rownames(rv$value) <- NULL
	if (!is.null(H1$value$prob) & !is.null(H2$value$prob))
	{
		rv$value$prob <- c(H1$value$prob, H2$value$prob)
	}

	if (!is.null(H1$postprob) & !is.null(H2$postprob))
	{
		rv$postprob <- cbind(H1$postprob, H2$postprob)
	}

	class(rv) <- "hlaAlleleClass"
	return(rv)
}


#######################################################################
# To compare HLA alleles
#

hlaCompareAllele <- function(TrueHLA, PredHLA, allele.limit=NULL,
	call.threshold=NaN, max.resolution="", output.individual=FALSE,
	verbose=TRUE)
{
	# check
	stopifnot(inherits(TrueHLA, "hlaAlleleClass"))
	stopifnot(inherits(PredHLA, "hlaAlleleClass"))
	stopifnot(is.null(allele.limit) |
		(is.vector(allele.limit) & is.character(allele.limit)) |
		inherits(allele.limit, "hlaAttrBagClass") |
		inherits(allele.limit, "hlaAttrBagObj"))
	stopifnot(max.resolution %in% c("2-digit", "4-digit", "6-digit",
		"8-digit", "allele", "protein", "2", "4", "6", "8", "full", ""))
	stopifnot(is.logical(output.individual))
	stopifnot(is.logical(verbose))

	# get the common samples
	samp <- intersect(TrueHLA$value$sample.id, PredHLA$value$sample.id)
	if ((length(samp) != length(TrueHLA$value$sample.id)) |
		(length(samp) != length(PredHLA$value$sample.id)))
	{
		if (verbose)
		{
			message("Calling 'hlaCompareAllele': there are ", length(samp),
				" individuals in common.\n")
		}
	}
	# True HLA
	flag <- match(samp, TrueHLA$value$sample.id)
	if (length(samp) != length(TrueHLA$value$sample.id))
	{
		TrueHLA <- hlaAlleleSubset(TrueHLA, flag)
	} else {
		if (!all(flag == 1:length(TrueHLA$value$sample.id)))
			TrueHLA <- hlaAlleleSubset(TrueHLA, flag)
	}
	# Predicted HLA
	flag <- match(samp, PredHLA$value$sample.id)
	if (length(samp) != length(PredHLA$value$sample.id))
	{
		PredHLA <- hlaAlleleSubset(PredHLA, flag)
	} else {
		if (!all(flag == 1:length(PredHLA$value$sample.id)))
			PredHLA <- hlaAlleleSubset(PredHLA, flag)
	}

	# init
	flag <- !is.na(TrueHLA$value$allele1) & !is.na(TrueHLA$value$allele2) &
		!is.na(PredHLA$value$allele1) & !is.na(PredHLA$value$allele2)
	ts1 <- TrueHLA$value$allele1[flag]; ts2 <- TrueHLA$value$allele2[flag]
	ps1 <- PredHLA$value$allele1[flag]; ps2 <- PredHLA$value$allele2[flag]
	samp.id <- TrueHLA$value$sample.id[flag]

	# call threshold
	if (is.finite(call.threshold))
	{
		prob <- PredHLA$value$prob
		if (!is.null(prob)) prob <- prob[flag]
	} else {
		prob <- NULL
	}

	# allele limitation
	if (!is.null(allele.limit))
	{
		if (inherits(allele.limit, "hlaAttrBagClass") |
			inherits(allele.limit, "hlaAttrBagObj"))
		{
			allele <- hlaUniqueAllele(allele.limit$hla.allele)
			TrainFreq <- allele.limit$hla.freq
			TrainNum <- allele.limit$n.samp
		} else {
			allele <- hlaUniqueAllele(as.character(allele.limit))
			TrainFreq <- NULL
			TrainNum <- NaN
		}
	} else {
		allele <- hlaUniqueAllele(c(ts1, ts2))
		TrainFreq <- NULL
		TrainNum <- NaN
	}

	# max resolution
	if (!(max.resolution %in% c("full", "")))
	{
		ts1 <- hlaAlleleDigit(ts1, max.resolution)
		ts2 <- hlaAlleleDigit(ts2, max.resolution)
		ps1 <- hlaAlleleDigit(ps1, max.resolution)
		ps2 <- hlaAlleleDigit(ps2, max.resolution)
		tmp <- hlaAlleleDigit(allele, max.resolution)
		allele <- hlaUniqueAllele(tmp)
		if ((length(tmp) != length(allele)) & !is.null(TrainFreq))
		{
			x <- rep(0, length(allele))
			for (i in 1:length(allele))
				x[i] <- sum(TrainFreq[tmp == allele[i]])
			TrainFreq <- x
		}
	}

	# allele filter
	flag <- (ts1 %in% allele) & (ts2 %in% allele)
	ts1 <- ts1[flag]; ts2 <- ts2[flag]
	ps1 <- ps1[flag]; ps2 <- ps2[flag]
	samp.id <- samp.id[flag]
	if (!is.null(prob)) prob <- prob[flag]

	# init ...
	cnt.ind <- 0; cnt.haplo <- 0; cnt.call <- 0
	n <- length(ts1)
	m <- length(allele)

	TrueNum <- rep(0, m); names(TrueNum) <- allele
	TrueNumAll <- rep(0, m); names(TrueNumAll) <- allele
	PredNum <- rep(0, m+1); names(PredNum) <- c(allele, "...")
	confusion <- matrix(0.0, nrow = m+1, ncol = m,
		dimnames = list(Predict=names(PredNum), True=names(TrueNum)))
	WrongTab <- NULL

	# for PredNum
	fn <- function(x, LT)
		{ if (x %in% LT) { return(x) } else { return("...") } }

	acc.array <- rep(NaN, n)
	ind.truehla <- character(n)
	ind.predhla <- character(n)

	if (n > 0)
	{
		for (i in 1:n)
		{
			# increase
			TrueNumAll[[ ts1[i] ]] <- TrueNumAll[[ ts1[i] ]] + 1
			TrueNumAll[[ ts2[i] ]] <- TrueNumAll[[ ts2[i] ]] + 1

			# probability cut-off
			if (is.null(prob))
				flag <- TRUE
			else
				flag <- (prob[i] >= call.threshold)
			if (flag)
			{
				# update TrueNum and PredNum
				TrueNum[[ ts1[i] ]] <- TrueNum[[ ts1[i] ]] + 1
				TrueNum[[ ts2[i] ]] <- TrueNum[[ ts2[i] ]] + 1
				PredNum[[ fn(ps1[i], allele) ]] <- PredNum[[ fn(ps1[i], allele) ]] + 1
				PredNum[[ fn(ps2[i], allele) ]] <- PredNum[[ fn(ps2[i], allele) ]] + 1

				# correct count of individuals
				if ( ((ts1[i]==ps1[i]) & (ts2[i]==ps2[i])) |
					((ts2[i]==ps1[i]) & (ts1[i]==ps2[i])) )
				{
					cnt.ind <- cnt.ind + 1
				}

				# correct count of haplotypes
				s <- c(ts1[i], ts2[i]); p <- c(ps1[i], ps2[i])
				ind.truehla[i] <- paste(s[order(s)], collapse="/")
				ind.predhla[i] <- paste(p[order(p)], collapse="/")
				
				hnum <- 0
				if ((s[1]==p[1]) | (s[1]==p[2]))
				{
					if (s[1]==p[1]) { p[1] <- "" } else { p[2] <- "" }
					confusion[s[1], s[1]] <- confusion[s[1], s[1]] + 1
					cnt.haplo <- cnt.haplo + 1
					hnum <- hnum + 1
				}
				if ((s[2]==p[1]) | (s[2]==p[2]))
				{
					confusion[s[2], s[2]] <- confusion[s[2], s[2]] + 1
					cnt.haplo <- cnt.haplo + 1
					hnum <- hnum + 1
				}
				acc.array[i] <- 0.5*hnum

				# for confusion matrix
				s <- c(ts1[i], ts2[i]); p <- c(ps1[i], ps2[i])
				if (hnum == 1)
				{
					if ((s[1]==p[1]) | (s[1]==p[2]))
					{
						if (s[1]==p[1])
						{
							confusion[fn(p[2], allele), s[2]] <-
								confusion[fn(p[2], allele), s[2]] + 1
						} else {
							confusion[fn(p[1], allele), s[2]] <-
								confusion[fn(p[1], allele), s[2]] + 1
						}
					} else {
						if (s[2]==p[1])
						{
							confusion[fn(p[2], allele), s[1]] <-
								confusion[fn(p[2], allele), s[1]] + 1
						} else {
							confusion[fn(p[1], allele), s[1]] <-
								confusion[fn(p[1], allele), s[1]] + 1
						}
					}
				} else if (hnum == 0)
				{
					WrongTab <- cbind(WrongTab,
						c(s, fn(p[1], allele), fn(p[2], allele)))
				}

				# the number of calling
				cnt.call <- cnt.call + 1
			}
		}
	}

	# overall
	overall <- data.frame(total.num.ind = n,
		crt.num.ind = cnt.ind, crt.num.haplo = cnt.haplo,
		acc.ind = cnt.ind/cnt.call, acc.haplo = 0.5*cnt.haplo/cnt.call,
		call.threshold = call.threshold)
	if (is.finite(call.threshold))
	{
		overall$n.call <- cnt.call
		overall$call.rate <- cnt.call / n
	} else {
		overall$n.call <- n
		overall$call.rate <- 1.0
		overall$call.threshold <- 0
	}

	# confusion matrix
	if (is.null(WrongTab))
	{
		nw <- as.integer(0)
	} else {
		nw <- ncol(WrongTab)
	}
	rv <- .C("HIBAG_Confusion", as.integer(m), confusion,
		nw, match(WrongTab, names(PredNum)) - as.integer(1),
		out = matrix(0.0, nrow=m+1, ncol=m, dimnames=
			list(Predict=names(PredNum), True=names(TrueNum))),
		tmp = double((m+1)*m),
		err = integer(1), NAOK = TRUE, PACKAGE = "HIBAG")
	if (rv$err != 0) stop(hlaErrMsg())
	confusion <- round(rv$out, 2)

	# detail -- sensitivity and specificity
	detail <- data.frame(allele = allele, stringsAsFactors=FALSE)
	if (!is.null(TrainFreq))
	{
		detail$train.num <- 2 * TrainFreq * TrainNum
		detail$train.freq <- TrainFreq
	}
	detail$valid.num <- TrueNumAll
	detail$valid.freq <- TrueNumAll / sum(TrueNumAll)
	detail$call.rate <- TrueNum / TrueNumAll

	sens <- diag(confusion) / TrueNum
	spec <- 1 - (PredNum[1:m] - diag(confusion)) / (2*cnt.call - TrueNum)
	detail$accuracy <- (sens*TrueNum + spec*(2*cnt.call - TrueNum)) / (2*cnt.call)
	detail$sensitivity <- sens
	detail$specificity <- spec
	detail$ppv <- diag(confusion) / rowSums(confusion)[1:m]
	detail$npv <- 1 - (TrueNum - diag(confusion)) / (2*n - rowSums(confusion)[1:m])

	detail$call.rate[!is.finite(detail$call.rate)] <- 0
	detail[detail$call.rate<=0,
		c("sensitivity", "specificity", "ppv", "npv", "accuracy")] <- NaN

	# get miscall
	rv <- confusion; diag(rv) <- 0
	m.max <- apply(rv, 2, max); m.idx <- apply(rv, 2, which.max)
	s <- names(PredNum)[m.idx]; s[m.max<=0] <- NA
	p <- m.max / apply(rv, 2, sum)
	detail <- cbind(detail, miscall=s, miscall.prop=p, stringsAsFactors=FALSE)
	rownames(detail) <- NULL

	# output
	rv <- list(overall=overall, confusion=confusion, detail=detail)
	if (output.individual)
	{
		rv$individual <- data.frame(sample.id=samp.id,
			true.hla=ind.truehla, pred.hla=ind.predhla,
			accuracy=acc.array, stringsAsFactors=FALSE)
	}
	return(rv)
}


#######################################################################
# Return a sample list satisfying the filter conditions
#

hlaSampleAllele <- function(TrueHLA, allele.limit = NULL, max.resolution="")
{
	# check
	stopifnot(inherits(TrueHLA, "hlaAlleleClass"))
	stopifnot(is.null(allele.limit) | is.vector(allele.limit) |
		inherits(allele.limit, "hlaAttrBagClass") |
		inherits(allele.limit, "hlaAttrBagObj"))
	stopifnot(max.resolution %in% c("2-digit", "4-digit", "6-digit",
		"8-digit", "allele", "protein", "2", "4", "6", "8", "full", ""))

	# init
	flag <- !is.na(TrueHLA$value$allele1) & !is.na(TrueHLA$value$allele2)
	ts1 <- TrueHLA$value$allele1[flag]
	ts2 <- TrueHLA$value$allele2[flag]

	# max resolution
	if (!(max.resolution %in% c("full", "")))
	{
		ts1 <- hlaAlleleDigit(ts1, max.resolution)
		ts2 <- hlaAlleleDigit(ts2, max.resolution)
	}

	# allele limitation
	if (!is.null(allele.limit))
	{
		if (inherits(allele.limit, "hlaAttrBagClass") |
			inherits(allele.limit, "hlaAttrBagObj"))
		{
			allele <- levels(factor(allele.limit$hla.allele))
		} else {
			allele <- levels(factor(allele.limit))
		}
		if (!(max.resolution %in% c("full", "")))
		{
			allele <- hlaAlleleDigit(allele, max.resolution)
		}
		flag[flag] <- (ts1 %in% allele) & (ts2 %in% allele)
	}

	# return
	return(TrueHLA$value$sample.id[flag])
}


#######################################################################
# Divide the list of HLA types to the training and validation sets
#

hlaSplitAllele <- function(HLA, train.prop=0.5)
{
	# check
	stopifnot(inherits(HLA, "hlaAlleleClass"))

	train.set <- NULL
	H <- HLA
	while (dim(H$value)[1] > 0)
	{
		v <- summary(H, show=FALSE)
		if (dim(v)[1] > 1)
		{
			v <- v[order(v[, "count"]), ]
		}

		allele <- rownames(v)[1]
		samp.id <- H$value$sample.id[ H$value$allele1==allele |
			H$value$allele2==allele ]
		samp.id <- as.character(samp.id)

		n.train <- ceiling(length(samp.id) * train.prop)
		train.sampid <- sample(samp.id, n.train)
		train.set <- c(train.set, train.sampid)

		H <- hlaAlleleSubset(H, samp.sel=
			match(setdiff(H$value$sample.id, samp.id), H$value$sample.id))
	}

	train.set <- train.set[order(train.set)]
	list(
		training = hlaAlleleSubset(HLA,
			samp.sel = match(train.set, HLA$value$sample.id)),
		validation = hlaAlleleSubset(HLA,
			samp.sel = match(setdiff(HLA$value$sample.id, train.set),
				HLA$value$sample.id)
			)
	)
}


#######################################################################
# To select SNPs in the flanking region of a specified HLA locus
#

hlaFlankingSNP <- function(snp.id, position, hla.id, flank.bp=500*1000,
	assembly=c("auto", "hg18", "hg19", "unknown"))
{
	# init
	assembly <- match.arg(assembly)
	HLAInfo <- hlaLociInfo(assembly)
	ID <- HLAInfo$loci[-length(HLAInfo$loci)]

	# check
	stopifnot(length(snp.id) == length(position))
	stopifnot(is.character(hla.id))
	stopifnot(length(hla.id) == 1)
	if (!(hla.id %in% ID))
		stop(paste("`hla.id' should be one of", paste(ID, collapse=",")))

	pos.start <- HLAInfo$pos.HLA.start[hla.id] - flank.bp
	pos.end <- HLAInfo$pos.HLA.end[hla.id] + flank.bp

	if (is.finite(pos.start) & is.finite(pos.end))
	{
		flag <- (pos.start <= position) & (position <= pos.end)
		return(snp.id[flag])
	} else {
		stop("The position information is not available!")
	}
}


#######################################################################
# Summary a "hlaAlleleClass" object
#

summary.hlaAlleleClass <- function(object, show=TRUE, ...)
{
	# check
	stopifnot(inherits(object, "hlaAlleleClass"))
	hla <- object

	HUA <- hlaUniqueAllele(c(hla$value$allele1, hla$value$allele2))
	HLA <- factor(match(c(hla$value$allele1, hla$value$allele2), HUA))
	levels(HLA) <- HUA	
	count <- table(HLA)
	freq <- prop.table(count)
	rv <- cbind(count=count, freq=freq)

	# get the number of unique genotypes
	m <- data.frame(a1 = hla$value$allele1,
		a2 = hla$value$allele2, stringsAsFactors=FALSE)
	lst <- apply(m, 1, FUN=function(x) {
		if (!is.na(x[1]) & !is.na(x[2]))
		{
			if (x[1] <= x[2])
				paste(x[1], x[2], sep="/")
			else
				paste(x[2], x[1], sep="/")
		} else
			NA
	})
	unique.n.geno <- nlevels(factor(lst))

	if (show)
	{
		cat("Gene: ", .hla_gene_name_string(hla$locus), "\n", sep="")
		cat(sprintf("Range: [%dbp, %dbp]", hla$pos.start, hla$pos.end))
		if (!is.null(hla$assembly))
			cat(" on ", hla$assembly, "\n", sep="")
		else
			cat("\n")
		cat(sprintf("# of samples: %d\n", dim(hla$value)[1]))
		cat(sprintf("# of unique HLA alleles: %d\n", length(count)))
		cat(sprintf("# of unique HLA genotypes: %d\n", unique.n.geno))

		p <- hla$value$prob
		if (!is.null(p))
		{
			z <- table(cut(p, breaks=c(0, 0.25, 0.5, 0.75, 1),
				right=FALSE, include.lowest=TRUE))
			z[] <- sprintf("%d (%0.1f%%)", z, prop.table(z)*100)
			names(attr(z, "dimnames")) <- "Posterior probability:"
			print(z)
		}
	}

	# return
	return(invisible(rv))
}




##########################################################################
##########################################################################
#
# Attribute Bagging method -- HIBAG algorithm
#

##########################################################################
# To fit an attribute bagging model for predicting
#

hlaAttrBagging <- function(hla, snp, nclassifier=100,
	mtry=c("sqrt", "all", "one"), prune=TRUE, rm.na=TRUE,
	verbose=TRUE, verbose.detail=FALSE)
{
	# check
	stopifnot(inherits(hla, "hlaAlleleClass"))
	stopifnot(inherits(snp, "hlaSNPGenoClass"))
	stopifnot(is.character(mtry) | is.numeric(mtry))
	stopifnot(is.logical(verbose))
	stopifnot(is.logical(verbose.detail))
	if (verbose.detail) verbose <- TRUE

	# get the common samples
	samp.id <- intersect(hla$value$sample.id, snp$sample.id)

	# hla types
	samp.flag <- match(samp.id, hla$value$sample.id)
	hla.allele1 <- hla$value$allele1[samp.flag]
	hla.allele2 <- hla$value$allele2[samp.flag]
	if (rm.na)
	{
		if (any(is.na(c(hla.allele1, hla.allele2))))
		{
			warning("There are missing HLA alleles, and the corresponding samples have been removed.")
			flag <- is.na(hla.allele1) | is.na(hla.allele2)
			samp.id <- setdiff(samp.id, hla$value$sample.id[samp.flag[flag]])
			samp.flag <- match(samp.id, hla$value$sample.id)
			hla.allele1 <- hla$value$allele1[samp.flag]
			hla.allele2 <- hla$value$allele2[samp.flag]
		}
	} else {
		if (any(is.na(c(hla.allele1, hla.allele2))))
		{
			stop("There are missing HLA alleles!")
		}
	}

	# SNP genotypes
	samp.flag <- match(samp.id, snp$sample.id)
	snp.geno <- snp$genotype[, samp.flag]
	storage.mode(snp.geno) <- "integer"

	tmp.snp.id <- snp$snp.id
	tmp.snp.position <- snp$snp.position
	tmp.snp.allele <- snp$snp.allele

	# remove mono-SNPs
	snpsel <- rowMeans(snp.geno, na.rm=TRUE)
	snpsel[!is.finite(snpsel)] <- 0
	snpsel <- (0 < snpsel) & (snpsel < 2)
	if (sum(!snpsel) > 0)
	{
		snp.geno <- snp.geno[snpsel, ]
		if (verbose)
		{
			a <- sum(!snpsel)
			cat(sprintf("%s monomorphic SNP%s ha%s been removed.\n",
				a, if (a>1) "s" else "", if (a>1) "ve" else "s"))
		}
		tmp.snp.id <- tmp.snp.id[snpsel]
		tmp.snp.position <- tmp.snp.position[snpsel]
		tmp.snp.allele <- tmp.snp.allele[snpsel]
	}

	if (length(samp.id) <= 0)
		stop("There is no common sample between 'hla' and 'snp'.")
	if (length(dim(snp.geno)[1]) <= 0)
		stop("There is no valid SNP markers.")


	###################################################################
	# initialize ...

	n.snp <- dim(snp.geno)[1]      # Num. of SNPs
	n.samp <- dim(snp.geno)[2]     # Num. of samples
	HUA <- hlaUniqueAllele(c(hla.allele1, hla.allele2))
	H <- factor(match(c(hla.allele1, hla.allele2), HUA))
	levels(H) <- HUA
	n.hla <- nlevels(H)
	H1 <- as.integer(H[1:n.samp]) - as.integer(1)
	H2 <- as.integer(H[(n.samp+1):(2*n.samp)]) - as.integer(1)

	# create an attribute bagging object
	rv <- .C("HIBAG_Training", n.snp, n.samp, snp.geno, n.hla,
		H1, H2, AB=integer(1), err=integer(1),
		NAOK = TRUE, PACKAGE = "HIBAG")
	if (rv$err != 0) stop(hlaErrMsg())
	ABmodel <- rv$AB

	# number of variables randomly sampled as candidates at each split
	mtry <- mtry[1]
	if (is.character(mtry))
	{
		if (mtry == "sqrt")
		{
			mtry <- ceiling(sqrt(n.snp))
		} else if (mtry == "all")
		{
			mtry <- n.snp
		} else if (mtry == "one")
		{
			mtry <- as.integer(1)
		} else {
			stop("Invalid mtry!")
		}
	} else if (is.numeric(mtry))
	{
		if (is.finite(mtry))
		{
			if ((0 < mtry) & (mtry < 1)) mtry <- n.snp*mtry
			mtry <- ceiling(mtry)
			if (mtry > n.snp) mtry <- n.snp
		} else {
			mtry <- ceiling(sqrt(n.snp))
		}
	} else {
		stop("Invalid mtry value!")
	}
	if (mtry <= 0) mtry <- as.integer(1)

	if (verbose)
	{
		cat("Build a HIBAG model with", nclassifier, "individual classifiers:\n")
		cat("# of SNPs randomly sampled as candidates for each selection: ",
			mtry, "\n", sep="")
		cat("# of SNPs: ", n.snp, ", # of samples: ", n.samp, "\n", sep="")
		cat("# of unique HLA alleles: ", n.hla, "\n", sep="")
	}


	###################################################################
	# training ...
	# add new individual classifers
	rv <- .C("HIBAG_NewClassifiers", ABmodel, as.integer(nclassifier),
		as.integer(mtry), as.logical(prune), verbose, verbose.detail,
		err=integer(1), NAOK = TRUE, PACKAGE = "HIBAG")
	if (rv$err != 0) stop(hlaErrMsg())

	# output
	rv <- list(n.samp = n.samp, n.snp = n.snp, sample.id = samp.id,
		snp.id = tmp.snp.id, snp.position = tmp.snp.position,
		snp.allele = tmp.snp.allele,
		snp.allele.freq = 0.5*rowMeans(snp.geno, na.rm=TRUE),
		hla.locus = hla$locus, hla.allele = levels(H),
		hla.freq = prop.table(table(H)),
		assembly = as.character(snp$assembly)[1],
		model = ABmodel,
		appendix = list())
	if (is.na(rv$assembly)) rv$assembly <- "unknown"

	class(rv) <- "hlaAttrBagClass"
	return(rv)
}


##########################################################################
# To fit an attribute bagging model for predicting
#

hlaParallelAttrBagging <- function(cl, hla, snp, auto.save="",
	nclassifier=100, mtry=c("sqrt", "all", "one"), prune=TRUE, rm.na=TRUE,
	stop.cluster=FALSE, verbose=TRUE)
{
	# check
	stopifnot(is.null(cl) | inherits(cl, "cluster"))
	stopifnot(inherits(hla, "hlaAlleleClass"))
	stopifnot(inherits(snp, "hlaSNPGenoClass"))

	stopifnot(is.character(auto.save) & (length(auto.save)==1))
	stopifnot(is.numeric(nclassifier))
	stopifnot(is.character(mtry) | is.numeric(mtry))
	stopifnot(is.logical(prune))
	stopifnot(is.logical(rm.na))
	stopifnot(is.logical(stop.cluster))
	stopifnot(is.logical(verbose))

	if (!is.null(cl))
	{
	    if (!require(parallel, warn.conflicts=FALSE))
			stop("The `parallel' package should be installed.")
	}

	if (verbose)
	{
		if (!is.null(cl))
		{
			cat(sprintf(
				"Build a HIBAG model of %d individual classifier%s in parallel with %d node%s:\n",
				nclassifier, if (nclassifier>1) "s" else "",
				length(cl), if (length(cl)>1) "s" else ""
			))
		} else {
			cat(sprintf(
				"Build a HIBAG model of %d individual classifier%s:\n",
				nclassifier, if (nclassifier>1) "s" else ""
			))
		}
		if (auto.save != "")
			cat("The model is autosaved in '", auto.save, "'.\n", sep="")
	}

	# set random number
	if (!is.null(cl))
	{
		RNGkind("L'Ecuyer-CMRG")
		rand <- .Random.seed
		parallel::clusterSetRNGStream(cl)
	}

	ans <- local({
		total <- 0

		.DynamicClusterCall(cl,
			fun = function(job, hla, snp, mtry, prune, rm.na)
			{
				library(HIBAG)
				model <- hlaAttrBagging(hla=hla, snp=snp, nclassifier=1,
					mtry=mtry, prune=prune, rm.na=rm.na,
					verbose=FALSE, verbose.detail=FALSE)
				mobj <- hlaModelToObj(model)
				hlaClose(model)
				return(mobj)
			},
			combine.fun = function(obj1, obj2)
			{
				if (is.null(obj1))
					mobj <- obj2
				else if (is.null(obj2))
					mobj <- obj1
				else
					mobj <- hlaCombineModelObj(obj1, obj2)
				if (auto.save != "")
					save(mobj, file=auto.save)
				if (verbose & !is.null(mobj))
				{
					z <- summary(mobj, show=FALSE)
					cat(sprintf("  --  average out-of-bag accuracy: %0.2f%%, sd: %0.2f%%, min: %0.2f%%, max: %0.2f%%\n",
						z$info["accuracy", "Mean"], z$info["accuracy", "SD"],
						z$info["accuracy", "Min"], z$info["accuracy", "Max"]))
				}
				mobj
			},
			msg.fn = function(job, obj)
			{
				if (verbose)
				{
					z <- summary(obj, show=FALSE)
					total <<- total + 1
					cat(date(), sprintf(
						", %4d, job %3d, # of SNPs: %g, # of haplotypes: %g, accuracy: %0.1f%%\n",
						total, as.integer(job), z$info["num.snp", "Mean"],
						z$info["num.haplo", "Mean"],
						z$info["accuracy", "Mean"]), sep="")
				}
			},
			n = nclassifier, stop.cluster = stop.cluster,
			hla=hla, snp=snp, mtry=mtry, prune=prune, rm.na=rm.na
		)
	})

	if (!is.null(cl) & !stop.cluster)
	{
		parallel::nextRNGStream(rand)
		parallel::nextRNGSubStream(rand)
	}

	# return
	if (auto.save == "")
		return(hlaModelFromObj(ans))
	else
		return(invisible())
}


##########################################################################
# To fit an attribute bagging model for predicting
#

hlaClose <- function(model)
{
	# check
	stopifnot(inherits(model, "hlaAttrBagClass"))

	# class handler
	rv <- .C("HIBAG_Close", model$model, err=integer(1),
		NAOK = TRUE, PACKAGE = "HIBAG")
	if (rv$err != 0) stop(hlaErrMsg())

	# output
	return(invisible(NULL))
}


#######################################################################
# Check missing SNP predictors
#

hlaCheckSNPs <- function(model, object,
	match.type=c("RefSNP+Position", "RefSNP", "Position"), verbose=TRUE)
{
	# check
	stopifnot(inherits(model, "hlaAttrBagClass") |
		inherits(model, "hlaAttrBagObj"))
	stopifnot((is.vector(object) & is.character(object)) |
		inherits(object, "hlaSNPGenoClass"))
	match.type <- match.arg(match.type)
	stopifnot(is.logical(verbose))

	# initialize
	if (inherits(model, "hlaAttrBagClass"))
		model <- hlaModelToObj(model)

	# show information
	if (verbose)
	{
		cat("The HIBAG model:\n")
		cat(sprintf("\tThere are %d SNP predictors in total.\n",
			length(model$snp.id)))
		cat(sprintf("\tThere are %d individual classifiers.\n",
			length(model$classifiers)))
	}

	if (is.vector(object))
	{
		target.snp <- as.character(object)
		src.snp <- hlaSNPID(model, match.type)
	} else {
		target.snp <- hlaSNPID(object, match.type)
		src.snp <- hlaSNPID(model, match.type)
	}

	NumOfSNP <- integer(length(model$classifiers))
	NumOfValidSNP <- integer(length(model$classifiers))

	# enumerate each classifier
	for (i in 1:length(model$classifiers))
	{
		v <- model$classifiers[[i]]
		flag <- src.snp[v$snpidx] %in% target.snp
		NumOfSNP[i] <- length(v$snpidx)
		NumOfValidSNP[i] <- sum(flag)
	}

	rv <- data.frame(NumOfValidSNP = NumOfValidSNP, NumOfSNP = NumOfSNP,
		fraction = NumOfValidSNP/NumOfSNP)

	if (verbose)
	{
		cat("Summarize the missing fractions of SNP predictors per classifier:\n")
		print(summary(1 - rv$fraction))
	}

	# output
	invisible(rv)
}


#######################################################################
# Predict HLA types from unphased SNP data
#

predict.hlaAttrBagClass <- function(object, snp, cl=NULL,
	type=c("response", "prob", "response+prob"), vote=c("prob", "majority"),
	allele.check=TRUE, match.type=c("RefSNP+Position", "RefSNP", "Position"),
	same.strand=FALSE, verbose=TRUE, ...)
{
	# check
	stopifnot(inherits(object, "hlaAttrBagClass"))
	stopifnot(is.null(cl) | inherits(cl, "cluster"))
	stopifnot(is.logical(allele.check))
	stopifnot(is.logical(same.strand))
	stopifnot(is.logical(verbose))

	type <- match.arg(type)
	vote <- match.arg(vote)
	match.type <- match.arg(match.type)
	vote_method <- match(vote, c("prob", "majority"))
	if (!is.null(cl))
	{
	    if (!require(parallel, warn.conflicts=FALSE))
			stop("The `parallel' package should be installed.")
		if (length(cl) <= 1) cl <- NULL
	}

	# if warning
	if (!is.null(object$appendix$warning))
	{
		message(object$appendix$warning)
		warning(object$appendix$warning)
	}

	if (verbose)
	{
		# call, get the number of classifiers
		rv <- .C("HIBAG_GetNumClassifiers", object$model, CNum = integer(1),
			err=integer(1), NAOK=TRUE, PACKAGE="HIBAG")
		if (rv$err != 0) stop(hlaErrMsg())

		if (rv$CNum > 1) { s <- "s" } else { s <- "" }
		cat(sprintf(
			"HIBAG model: %d individual classifier%s, %d SNPs, %d unique HLA alleles.\n",
			rv$CNum, s, length(object$snp.id), length(object$hla.allele)))

		if (vote_method == 1)
			cat("Predicting based on the averaged posterior probabilities from all individual classifiers.\n")
		else
			cat("Predicting by voting from all individual classifiers.\n")

		if (!is.null(cl))
		{
			cat(sprintf(
				"Run in parallel with %d computing node%s.\n",
				length(cl), if (length(cl)>1) "s" else ""
			))
		}
	}

	if (!inherits(snp, "hlaSNPGenoClass"))
	{
		# it should be a vector or a matrix
		stopifnot(is.numeric(snp))
		stopifnot(is.vector(snp) | is.matrix(snp))

		if (is.vector(snp))
		{
			stopifnot(length(snp) == object$n.snp)
			snp <- matrix(snp, ncol=1)
		} else {
			stopifnot(nrow(snp) == object$n.snp)
		}
		geno.sampid <- 1:ncol(snp)
		assembly <- "auto-silent"

	} else {

		# a 'hlaSNPGenoClass' object

		##################################################
		# check assembly first

		model.assembly <- as.character(object$assembly)[1]
		if (is.na(model.assembly))
			model.assembly <- "unknown"
		geno.assembly <- as.character(snp$assembly)[1]
		if (is.na(geno.assembly))
			geno.assembly <- "unknown"
		refstr <- sprintf("Model assembly: %s, SNP assembly: %s",
			model.assembly, geno.assembly)
		if (verbose)
			cat(refstr, "\n", sep="")

		if (model.assembly != geno.assembly)
		{
			if (any(c(model.assembly, geno.assembly) %in% "unknown"))
			{
				if (verbose)
					message("The human genome references might not match!")
				if (geno.assembly == "unknown")
					assembly <- model.assembly
				else
					assembly <- geno.assembly
			} else {
				if (verbose)
					message("The human genome references do not match!")
				warning("The human genome references do not match! ", refstr, ".")
				assembly <- model.assembly
			}
		} else {
			if (model.assembly != "unknown")
				assembly <- model.assembly
			else
				assembly <- "auto"
		}

		##################################################

		geno.sampid <- snp$sample.id
		obj.id <- hlaSNPID(object, match.type)
		geno.id <- hlaSNPID(snp, match.type)

		# flag boolean
		flag <- FALSE
		if (length(obj.id) == length(geno.id))
		{
			if (all(obj.id == geno.id))
				flag <- TRUE
		}

		# check and switch A/B alleles
		if (flag)
		{
			if (allele.check)
			{
				snp <- hlaGenoSwitchStrand(snp, object,
					match.type, same.strand, verbose)$genotype
			} else {
				snp <- snp$genotype
			}
		} else {

			# snp selection
			snp.sel <- match(obj.id, geno.id)
			snp.allele <- snp$snp.allele
			snp.allele[is.na(snp.allele)] <- ""

			# tmp variable
			tmp <- list(genotype = snp$genotype[snp.sel,],
				sample.id = snp$sample.id,
				snp.id = object$snp.id, snp.position = object$snp.position,
				snp.allele = snp.allele[snp.sel],
				assembly = snp$assembly)

			flag <- is.na(tmp$snp.allele)
			tmp$snp.allele[flag] <- object$snp.allele[
				match(tmp$snp.id[flag], object$snp.id)]

			if (is.vector(tmp$genotype))
				tmp$genotype <- matrix(tmp$genotype, ncol=1)

			class(tmp) <- "hlaSNPGenoClass"

			# the total number of missing snp
			missing.cnt <- sum(flag)

			# verbose
			if (verbose)
			{
				if (missing.cnt > 0)
				{
					cat(sprintf("There %s %d missing SNP%s (%0.1f%%).\n",
						if (missing.cnt > 1) "are" else "is",
						missing.cnt,
						if (missing.cnt > 1) "s" else "",
						100*missing.cnt/length(obj.id)))
				}
			}

			# try alternative matching if possible
			if (match.type == "RefSNP+Position")
			{
				# match by RefSNP IDs
				s1 <- hlaSNPID(object, "RefSNP")
				s2 <- hlaSNPID(snp, "RefSNP")
				mcnt.id <- length(s1) - length(intersect(s1, s2))

				# match by positions
				s1 <- hlaSNPID(object, "Position")
				s2 <- hlaSNPID(snp, "Position")
				mcnt.pos <- length(s1) - length(intersect(s1, s2))

				if (((mcnt.id < missing.cnt) | (mcnt.pos < missing.cnt)) & verbose)
				{
					message(
						"Hint:\n",
						"The current SNP matching requires both RefSNP IDs and positions, and lower missing fraction(s) could be gained:\n",
						sprintf("\t%0.1f%% by matching RefSNP IDs only, call 'predict(..., match.type=\"RefSNP\")'\n",
							100*mcnt.id/length(s1)),
						sprintf("\t%0.1f%% by matching positions only, call 'predict(..., match.type=\"Position\")'\n",
							100*mcnt.pos/length(s1)),
						"Any concern about SNP mismatching should be emailed to the genotyping platform provider.")

					if (!is.null(object$appendix$platform))
						message("The supported platform(s): ", object$appendix$platform)
				}
			}

			if (missing.cnt == length(obj.id))
			{
				stop("There is no overlapping of SNPs!")
			} else if (missing.cnt > 0.5*length(obj.id))
			{
				warning("More than 50% of SNPs are missing!")
			}

			# switch
			if (allele.check)
			{
				snp <- hlaGenoSwitchStrand(tmp, object, match.type,
					same.strand, verbose)$genotype
			} else {
				snp <- snp$genotype
			}
		}
	}

	# check
	if (dim(snp)[1] != object$n.snp)
		stop("Internal error! Please contact the author.")

	# initialize ...
	n.samp <- dim(snp)[2]
	n.hla <- length(object$hla.allele)
	if (verbose)
		cat(sprintf("The number of samples: %d.\n", n.samp))

	# parallel units
	if (is.null(cl))
	{
		# to predict HLA types
		if (type %in% c("response", "response+prob"))
		{
			# the best-guess prediction

			if (type == "response")
			{
				rv <- .C("HIBAG_Predict_Resp", object$model, as.integer(snp),
					n.samp, as.integer(vote_method), as.logical(verbose),
					H1=integer(n.samp), H2=integer(n.samp), prob=double(n.samp),
					err=integer(1), NAOK=TRUE, PACKAGE="HIBAG")
			} else {
				rv <- .C("HIBAG_Predict_Resp_Prob", object$model, as.integer(snp),
					n.samp, as.integer(vote_method), as.logical(verbose),
					H1=integer(n.samp), H2=integer(n.samp), prob=double(n.samp), 
					postprob=matrix(NaN, nrow=n.hla*(n.hla+1)/2, ncol=n.samp),
					err=integer(1), NAOK=TRUE, PACKAGE="HIBAG")
			}
			if (rv$err != 0) stop(hlaErrMsg())

			res <- hlaAllele(geno.sampid,
				H1=object$hla.allele[rv$H1 + 1], H2=object$hla.allele[rv$H2 + 1],
				locus = object$hla.locus, prob = rv$prob, na.rm = FALSE,
				assembly = assembly)
			if (!is.null(rv$postprob))
			{
				res$postprob <- rv$postprob
				colnames(res$postprob) <- geno.sampid
				m <- outer(object$hla.allele, object$hla.allele,
					function(x, y) paste(x, y, sep="/"))
				rownames(res$postprob) <- m[lower.tri(m, diag=TRUE)]
			}

			NA.cnt <- sum(is.na(res$value$allele1) | is.na(res$value$allele2))

		} else {
			# all probabilites

			rv <- .C("HIBAG_Predict_Prob", object$model, as.integer(snp),
				n.samp, as.integer(vote_method), as.logical(verbose),
				prob=matrix(NaN, nrow=n.hla*(n.hla+1)/2, ncol=n.samp),
				err=integer(1), NAOK=TRUE, PACKAGE="HIBAG")
			if (rv$err != 0) stop(hlaErrMsg())

			res <- rv$prob
			colnames(res) <- geno.sampid
			m <- outer(object$hla.allele, object$hla.allele,
				function(x, y) paste(x, y, sep="/"))
			rownames(res) <- m[lower.tri(m, diag=TRUE)]

			NA.cnt <- sum(colSums(res) <= 0)
		}
	} else {

		# in parallel
		rv <- parallel::clusterApply(cl=cl, parallel::splitIndices(n.samp, length(cl)),
			fun = function(idx, mobj, snp, type, vote)
			{
				if (length(idx) > 0)
				{
					library(HIBAG)
					m <- hlaModelFromObj(mobj)
					pd <- predict(m, snp[,idx], type=type, vote=vote, verbose=FALSE)
					hlaClose(m)
					return(pd)
				} else {
					return(NULL)
				}
			}, mobj=hlaModelToObj(object), snp=snp, type=type, vote=vote
		)

		if (type %in% c("response", "response+prob"))
		{
			res <- rv[[1]]
			for (i in 2:length(rv))
			{
				if (!is.null(rv[[i]]))
					res <- hlaCombineAllele(res, rv[[i]])
			}
			res$value$sample.id <- geno.sampid
			if (!is.null(res$postprob))
				colnames(res$postprob) <- geno.sampid
			NA.cnt <- sum(is.na(res$value$allele1) | is.na(res$value$allele2))
		} else {
			res <- rv[[1]]
			for (i in 2:length(rv))
			{
				if (!is.null(rv[[i]]))
					res <- cbind(res, rv[[i]])
			}
			colnames(res) <- geno.sampid
			NA.cnt <- sum(colSums(res) <= 0)
		}
	} 

	if (NA.cnt > 0)
	{
		if (NA.cnt > 1) s <- "s" else s <- ""
		warning(sprintf(
			"No prediction output%s for %d individual%s (possibly due to missing SNPs.)",
			s, NA.cnt, s))
	}

	# return
	return(res)
}


#######################################################################
# Merge predictions by voting
#

hlaPredMerge <- function(..., weight=NULL, equivalence=NULL)
{
	# check "..."
	pdlist <- list(...)
	if (length(pdlist) <= 0)
		stop("No object is passed to 'hlaPredMerge'.")
	for (i in 1:length(pdlist))
	{
		if (!inherits(pdlist[[i]], "hlaAlleleClass"))
			stop("The object(s) passed to 'hlaPredMerge' should be 'hlaAlleleClass'.")
		if (is.null(pdlist[[i]]$postprob))
		{
			stop("The object(s) passed to 'hlaPredMerge' should have a field of 'postprob',",
				" returned by 'predict(..., type=\"response+prob\", vote=\"majority\")'.")
		}
	}

	# check equivalence
	stopifnot(is.null(equivalence) | is.data.frame(equivalence))
	if (is.data.frame(equivalence))
	{
		if (ncol(equivalence) != 2)
		{
			stop("'equivalence' should have two columns: ",
				"the first for new equivalent alleles, and the second for the ",
				"alleles possibly existed in the object(s) passed to 'hlaPredMerge'.")
		}
	}

	# check locus and sample.id
	samp.id <- pdlist[[1]]$value$sample.id
	locus <- pdlist[[1]]$locus
	for (i in 1:length(pdlist))
	{
		if (!identical(samp.id, pdlist[[i]]$value$sample.id))
			stop("The sample IDs should be the same.")
		if (!identical(locus, pdlist[[i]]$locus))
			stop("The locus should be the same.")
	}

	# check weight
	if (!is.null(weight))
	{
		stopifnot(is.numeric(weight) & is.vector(weight))
		if (length(pdlist) != length(weight))
			stop("Invalid 'weight'.")
		weight <- abs(weight)
		weight <- weight / sum(weight)
	} else {
		weight <- rep(1/length(pdlist), length(pdlist))
	}

	#############################################################
	# replace function
	replace <- function(allele)
	{
		if (!is.null(equivalence))
		{
			i <- match(allele, equivalence[, 2])
			flag <- !is.na(i)
			i <- i[flag]
			allele[flag] <- equivalence[i, 1]
		}
		allele
	}


	# all different alleles
	hla.allele <- NULL
	for (i in 1:length(pdlist))
	{
		h <- unique(unlist(strsplit(rownames(pdlist[[i]]$postprob), "/")))
		hla.allele <- unique(c(hla.allele, replace(h)))
	}
	hla.allele <- hlaUniqueAllele(hla.allele)
	n.hla <- length(hla.allele)
	n.samp <- length(samp.id)

	prob <- matrix(0.0, nrow=n.hla*(n.hla+1)/2, ncol=n.samp)
	m <- outer(hla.allele, hla.allele, function(x, y) paste(x, y, sep="/"))
	m <- m[lower.tri(m, diag=TRUE)]

	# for-loop
	for (i in 1:length(pdlist))
	{
		p <- pdlist[[i]]$postprob
		h <- replace(unlist(strsplit(rownames(p), "/")))

		h1 <- h[seq(1, length(h), 2)]; h2 <- h[seq(2, length(h), 2)]
		j1 <- match(paste(h1, h2, sep="/"), m)
		j2 <- match(paste(h2, h1, sep="/"), m)
		j1[is.na(j1)] <- j2[is.na(j1)]

		# check
		stopifnot(!any(is.na(j1)))

		p <- p * weight[i]
		for (j in seq_len(length(j1)))
			prob[j1[j], ] <- prob[j1[j], ] + p[j, ]
	}
	colnames(prob) <- samp.id
	rownames(prob) <- m

	pb <- apply(prob, 2, max)
	pt <- unlist(strsplit(m[apply(prob, 2, which.max)], "/"))
	assembly <- pdlist[[1]]$assembly
	if (is.null(assembly)) assembly <- "auto"

	rv <- hlaAllele(samp.id,
		H1 = pt[seq(2, length(pt), 2)],
		H2 = pt[seq(1, length(pt), 2)],
		locus = locus,
		locus.pos.start = pdlist[[1]]$pos.start,
		locus.pos.end = pdlist[[1]]$pos.end,
		prob = pb, na.rm = FALSE,
		assembly = assembly)
	rv$postprob <- prob
	rv
}




#######################################################################
# summarize the "hlaAttrBagClass" object
#

summary.hlaAttrBagClass <- function(object, show=TRUE, ...)
{
	obj <- hlaModelToObj(object)
	summary(obj, show=show)
}


#######################################################################
# Save the parameters in a model of attribute bagging
#

hlaModelToObj <- function(model)
{
	# check
	stopifnot(inherits(model, "hlaAttrBagClass"))

	# call, get the number of classifiers
	rv <- .C("HIBAG_GetNumClassifiers", model$model, CNum = integer(1),
		err=integer(1), NAOK=TRUE, PACKAGE="HIBAG")
	if (rv$err != 0) stop(hlaErrMsg())

	# for each tree
	res <- vector("list", rv$CNum)
	for (i in 1:length(res))
	{
		# call, get the number of haplotypes
		rv <- .C("HIBAG_Idv_GetNumHaplo", model$model, as.integer(i),
			NumHaplo = integer(1), NumSNP = integer(1),
			err=integer(1), NAOK=TRUE, PACKAGE="HIBAG")
		if (rv$err != 0) stop(hlaErrMsg())

		# call, get freq. and haplotypes
		rv <- .C("HIBAG_Classifier_GetHaplos", model$model, as.integer(i),
			freq=double(rv$NumHaplo), hla=integer(rv$NumHaplo),
			haplo=character(rv$NumHaplo), snpidx = integer(rv$NumSNP),
			samp.num = integer(model$n.samp), acc = double(1),
			err=integer(1), NAOK=TRUE, PACKAGE="HIBAG")
		if (rv$err != 0) stop(hlaErrMsg())

		res[[i]] <- list(
			samp.num = rv$samp.num,
			haplos = data.frame(freq = rv$freq, hla = model$hla.allele[rv$hla],
				haplo = rv$haplo, stringsAsFactors=FALSE),
			snpidx = rv$snpidx,
			outofbag.acc = rv$acc)
	}

	rv <- list(n.samp = model$n.samp, n.snp = model$n.snp,
		sample.id = model$sample.id, snp.id = model$snp.id,
		snp.position = model$snp.position, snp.allele = model$snp.allele,
		snp.allele.freq = model$snp.allele.freq,
		hla.locus = model$hla.locus,
		hla.allele = model$hla.allele, hla.freq = model$hla.freq,
		assembly = model$assembly,
		classifiers = res,
		appendix <- model$appendix)
	class(rv) <- "hlaAttrBagObj"
	return(rv)
}


#######################################################################
# To combine two model objects of attribute bagging
#

hlaCombineModelObj <- function(obj1, obj2)
{
	# check
	stopifnot(inherits(obj1, "hlaAttrBagObj"))
	stopifnot(inherits(obj2, "hlaAttrBagObj"))
	stopifnot(identical(obj1$hla.locus, obj2$hla.locus))
	stopifnot(identical(obj1$snp.id, obj2$snp.id))
	stopifnot(identical(obj1$hla.allele, obj2$hla.allele))
	stopifnot(identical(obj1$assembly, obj2$assembly))

	samp.id <- unique(c(obj1$sample.id, obj2$sample.id))
	if (!is.null(obj1$appendix) | !is.null(obj2$appendix))
	{
		appendix <- list(
			platform =
				unique(c(obj1$appendix$platform, obj2$appendix$platform)),
			information =
				unique(c(obj1$appendix$information, obj2$appendix$information)),
			warning =
				unique(c(obj1$appendix$warning, obj2$appendix$warning))
		)
	} else {
		appendix <- NULL
	}

	rv <- list(n.samp = length(samp.id), n.snp = obj1$n.snp,
		sample.id = samp.id, snp.id = obj1$snp.id,
		snp.position = obj1$snp.position, snp.allele = obj1$snp.allele,
		snp.allele.freq = (obj1$snp.allele.freq + obj2$snp.allele.freq)*0.5,
		hla.locus = obj1$hla.locus, hla.allele = obj1$hla.allele,
		hla.freq = (obj1$hla.freq + obj2$hla.freq)*0.5,
		assembly = obj1$assembly,
		classifiers = c(obj1$classifiers, obj2$classifiers),
		appendix = appendix)
	class(rv) <- "hlaAttrBagObj"
	return(rv)
}


#######################################################################
# To get the top n individual classifiers
#

hlaSubModelObj <- function(obj, n)
{
	# check
	stopifnot(inherits(obj, "hlaAttrBagObj"))
	obj$classifiers <- obj$classifiers[1:n]
	return(obj)
}


#######################################################################
# To get a "hlaAttrBagClass" class
#

hlaModelFromObj <- function(obj)
{
	# check
	stopifnot(inherits(obj, "hlaAttrBagObj"))

	# create an attribute bagging object
	rv <- .C("HIBAG_New",
		as.integer(obj$n.samp), as.integer(obj$n.snp), length(obj$hla.allele),
		model = integer(1), err=integer(1), NAOK=TRUE, PACKAGE="HIBAG")
	if (rv$err != 0) stop(hlaErrMsg())
	ABmodel <- rv$model

	# add individual classifiers
	for (tree in obj$classifiers)
	{
		hla <- match(tree$haplos$hla, obj$hla.allele) - 1
		if (any(is.na(hla)))
			stop("Invalid HLA alleles in the individual classifier.")
		if (is.null(tree$samp.num))
			snum <- rep(as.integer(1), obj$n.samp)
		else
			snum <- tree$samp.num
		rv <- .C("HIBAG_NewClassifierHaplo", ABmodel, length(tree$snpidx),
			as.integer(tree$snpidx-1), as.integer(snum), dim(tree$haplos)[1],
			as.double(tree$haplos$freq), as.integer(hla),
			as.character(tree$haplos$haplo), as.double(tree$outofbag.acc),
			err=integer(1), NAOK=TRUE, PACKAGE="HIBAG")
		if (rv$err != 0) stop(hlaErrMsg())
	}

	# output
	rv <- list(n.samp = obj$n.samp, n.snp = obj$n.snp,
		sample.id = obj$sample.id, snp.id = obj$snp.id,
		snp.position = obj$snp.position, snp.allele = obj$snp.allele,
		snp.allele.freq = obj$snp.allele.freq,
		hla.locus = obj$hla.locus, hla.allele = obj$hla.allele,
		hla.freq = obj$hla.freq,
		assembly = as.character(obj$assembly)[1],
		model = ABmodel,
		appendix = obj$appendix)
	if (is.na(rv$assembly)) rv$assembly <- "unknown"

	class(rv) <- "hlaAttrBagClass"
	return(rv)
}


#######################################################################
# summarize the "hlaAttrBagObj" object
#

summary.hlaAttrBagObj <- function(object, show=TRUE, ...)
{
	# check
	stopifnot(inherits(object, "hlaAttrBagObj"))
	obj <- object

	if (show)
	{
		cat("Gene: ", .hla_gene_name_string(obj$hla.locus), "\n", sep="")
		cat("Training dataset:", obj$n.samp, "samples X",
			length(obj$snp.id), "SNPs\n")
		cat("\t# of HLA alleles: ", length(obj$hla.allele), "\n", sep="")
	}

	# summarize ...
	snpset <- NULL
	outofbag.acc <- rep(NaN, length(obj$classifiers))
	numsnp <- rep(NA, length(obj$classifiers))
	numhaplo <- rep(NA, length(obj$classifiers))
	snp.hist <- rep(0, length(obj$snp.id))
	for (i in 1:length(obj$classifiers))
	{
		outofbag.acc[i] <- obj$classifiers[[i]]$outofbag.acc
		numsnp[i] <- length(obj$classifiers[[i]]$snpidx)
		numhaplo[i] <- length(obj$classifiers[[i]]$haplos$hla)
		snp.hist[obj$classifiers[[i]]$snpidx] <- snp.hist[obj$classifiers[[i]]$snpidx] + 1
		snpset <- unique(c(snpset, obj$classifiers[[i]]$snpidx))
	}
	snpset <- snpset[order(snpset)]
	outofbag.acc <- outofbag.acc * 100

	info <- data.frame(
		Mean = c(mean(numsnp), mean(numhaplo), mean(outofbag.acc)),
		SD = c(sd(numsnp), sd(numhaplo), sd(outofbag.acc)),
		Min = c(min(numsnp), min(numhaplo), min(outofbag.acc)),
		Max = c(max(numsnp), max(numhaplo), max(outofbag.acc))
	)
	rownames(info) <- c("num.snp", "num.haplo", "accuracy")

	if (show)
	{
		cat("\t# of individual classifiers: ", length(obj$classifiers), "\n", sep="")
		cat("\ttotal # of SNPs used: ", length(snpset), "\n", sep="")
		cat(sprintf(
			"\taverage # of SNPs in an individual classifier: %0.2f, sd: %0.2f, min: %d, max: %d\n",
			mean(numsnp), sd(numsnp), min(numsnp), max(numsnp)))
		cat(sprintf(
			"\taverage # of haplotypes in an individual classifier: %0.2f, sd: %0.2f, min: %d, max: %d\n",
			mean(numhaplo), sd(numhaplo), min(numhaplo), max(numhaplo)))
		cat(sprintf(
			"\taverage out-of-bag accuracy: %0.2f%%, sd: %0.2f%%, min: %0.2f%%, max: %0.2f%%\n",
			mean(outofbag.acc), sd(outofbag.acc), min(outofbag.acc),
			max(outofbag.acc)))

		if (is.null(obj$assembly))
			cat("Genome assembly: unknown\n")
		else
			cat("Genome assembly: ", obj$assembly, "\n", sep="")

		if (!is.null(obj$appendix$platform))
			cat("Platform:", obj$appendix$platform, "\n")
		if (!is.null(obj$appendix$information))
			cat("Information:", obj$appendix$information, "\n")
		if (!is.null(obj$appendix$warning))
			message(obj$appendix$warning)
	}

	rv <- list(num.classifier = length(obj$classifiers),
		num.snp = length(snpset),
		snp.id = obj$snp.id, snp.position = obj$snp.position,
		snp.hist = snp.hist, info = info)
	return(invisible(rv))
}




##########################################################################
# to finalize the HIBAG model
#

hlaPublish <- function(mobj, platform=NULL, information=NULL, warning=NULL,
	rm.unused.snp=TRUE, anonymize=TRUE, verbose=TRUE)
{
	# check
	stopifnot(inherits(mobj, "hlaAttrBagObj") |
		inherits(mobj, "hlaAttrBagClass"))
	stopifnot(is.null(platform) | is.character(platform))
	stopifnot(is.null(information) | is.character(information))
	stopifnot(is.null(warning) | is.character(warning))
	stopifnot(is.logical(rm.unused.snp) & (length(rm.unused.snp)==1))
	stopifnot(is.logical(verbose))
	if (inherits(mobj, "hlaAttrBagClass"))
		mobj <- hlaModelToObj(mobj)

	# additional information
	if (is.null(platform))
		platform <- mobj$appendix$platform
	if (is.null(information))
		information <- mobj$appendix$information
	if (is.null(warning))
		warning <- mobj$appendix$warning
	mobj$appendix <- list(
		platform=platform, information=information, warning=warning)

	# remove unused SNPs
	if (rm.unused.snp)
	{
		# get frequency of use for SNPs
		snp.hist <- rep(0, length(mobj$snp.id))
		for (i in 1:length(mobj$classifiers))
		{
			idx <- mobj$classifiers[[i]]$snpidx
			snp.hist[idx] <- snp.hist[idx] + 1
		}

		flag <- (snp.hist > 0)
		if (sum(flag) < mobj$n.snp)
		{
			if (verbose)
			{
				cnt <- mobj$n.snp - sum(flag)
				cat(sprintf("Remove %d unused SNP%s.\n", cnt, if (cnt>1) "s" else ""))
			}
		}
		mobj$n.snp <- sum(flag)
		mobj$snp.id <- mobj$snp.id[flag]
		mobj$snp.position <- mobj$snp.position[flag]
		mobj$snp.allele <- mobj$snp.allele[flag]
		mobj$snp.allele.freq <- mobj$snp.allele.freq[flag]

		idx.list <- rep(0, length(flag))
		idx.list[flag] <- 1:mobj$n.snp

		for (i in 1:length(mobj$classifiers))
		{
			mobj$classifiers[[i]]$snpidx <-
				idx.list[ mobj$classifiers[[i]]$snpidx ]
		}
	}

	# anonymize
	if (anonymize)
	{
		mobj$sample.id <- NULL
		for (i in 1:length(mobj$classifiers))
		{
			mobj$classifiers[[i]]$samp.num <- NULL
		}
	}

	# output
	return(mobj)
}


##########################################################################
# to get a model object of attribute bagging from a list of files
#

hlaModelFiles <- function(fn.list, action.missingfile=c("ignore", "stop"),
	verbose=TRUE)
{
	# check
	stopifnot(is.character(fn.list))
	stopifnot(is.logical(verbose))
	action.missingfile <- match.arg(action.missingfile)

	# for-loop
	rv <- NULL
	for (fn in fn.list)
	{
		if (file.exists(fn))
		{
			tmp <- get(load(fn))
			if (is.null(rv))
			{
				rv <- tmp
			} else {
				rv <- hlaCombineModelObj(rv, tmp)
			}
		} else {
			s <- sprintf("There is no '%s'.", fn)
			if (action.missingfile == "stop")
			{
				stop(s)
			} else {
				if (verbose) message(s)
			}
		}
	}
	rv
}


##########################################################################
# Out-of-bag estimation of overall accuracy, per-allele sensitivity, etc
#

hlaOutOfBag <- function(model, hla, snp, call.threshold=NaN, verbose=TRUE)
{
	# check
	stopifnot(inherits(model, "hlaAttrBagObj") |
		inherits(model, "hlaAttrBagClass"))
	stopifnot(inherits(hla, "hlaAlleleClass"))
	stopifnot(inherits(snp, "hlaSNPGenoClass"))

	stopifnot(is.numeric(call.threshold) & is.vector(call.threshold))
	stopifnot(length(call.threshold) == 1)

	stopifnot(is.logical(verbose) & is.vector(verbose))
	stopifnot(length(verbose) == 1)


	######################################################
	# initialize ...
	if (inherits(model, "hlaAttrBagClass"))
	{
		model <- hlaModelToObj(model)
		if (verbose) print(model)
	}

	# map samples
	if (is.null(model$sample.id))
		stop("There is no sample ID in the model.")
	samp.idx <- match(model$sample.id, snp$sample.id)
	if (any(is.na(samp.idx)))
		stop("Some of sample.id in the model do not exist in SNP genotypes.")
	hla.samp.idx <- match(model$sample.id, hla$value$sample.id)
	if (any(is.na(hla.samp.idx)))
		stop("Some of sample.id in the model do not exist in HLA types.")

	# map SNPs
	snp.idx <- match(model$snp.id, snp$snp.id)
	if (any(is.na(snp.idx)))
		stop("Some of snp.id in the model do not exist in SNP genotypes.")

	# genotypes and the number of classifiers
	geno <- snp$genotype[snp.idx, samp.idx]
	nclass <- length(model$classifiers)

	# the returned value
	ans <- NULL

	# the column names of details
	nm1 <- c("allele", "train.num", "train.freq")
	nm2 <- c("call.rate", "accuracy", "sensitivity", "specificity", "ppv", "npv")

	# for-loop
	for (i in 1:nclass)
	{
		mx <- model
		mx$classifiers <- mx$classifiers[i]
		s <- mx$classifiers[[1]]$samp.num
		if (is.null(s))
			stop("There is no bootstrap sample index.")

		tmp.model <- hlaModelFromObj(mx)
		tmp.geno <- geno[, s == 0]
		v <- predict(tmp.model, tmp.geno, verbose=FALSE)
		hlaClose(tmp.model)
		v$value$sample.id <- mx$sample.id[s == 0]
		pam <- hlaCompareAllele(hla, v, allele.limit=mx,
			call.threshold=call.threshold, verbose=FALSE)

		if (!is.null(ans))
		{
			# overall
			ans$overall <- ans$overall + pam$overall

			# confusion matrix
			ans$confusion <- ans$confusion + pam$confusion

			# details
			pam$detail <- pam$detail[, nm2]
			ans$n.detail <- ans$n.detail + !is.na(pam$detail)
			pam$detail[is.na(pam$detail)] <- 0
			ans$detail <- ans$detail + pam$detail
		} else {
			ans <- pam
			ans$detailhead <- ans$detail[, nm1]
			colnames(ans$detailhead) <- c("allele", "valid.num", "valid.freq")
			ans$detail <- ans$detail[, nm2]
			ans$n.detail <- !is.na(ans$detail)
			ans$detail[is.na(ans$detail)] <- 0
		}

		if (verbose)
		{
			cat(date(), sprintf(", passing the %d/%d classifiers.\n",
				i, nclass), sep="")
		}
	}

	# average
	ans$overall <- ans$overall / nclass
	ans$confusion <- ans$confusion / nclass
	ans$detail <- ans$detail / ans$n.detail

	# get miscall
	rv <- ans$confusion; diag(rv) <- 0
	m.max <- apply(rv, 2, max); m.idx <- apply(rv, 2, which.max)
	s <- rownames(ans$confusion)[m.idx]; s[m.max<=0] <- NA
	p <- m.max / apply(rv, 2, sum)

	# output
	ans$detail <- cbind(ans$detailhead, ans$detail,
		miscall=s, miscall.prop=p, stringsAsFactors=FALSE)
	ans$detailhead <- NULL
	ans$n.detail <- NULL
	ans
}


##########################################################################
# to create a report for evaluating accuracies
#

hlaReport <- function(object, export.fn="", type=c("txt", "tex", "html"),
	header=TRUE)
{
	# check
	stopifnot(is.list(object))
	stopifnot(is.data.frame(object$detail))
	stopifnot(is.character(export.fn))
	type <- match.arg(type)
	stopifnot(is.logical(header))

	# create an output file
	if (export.fn != "")
	{
		f <- file(export.fn, "wt")
		on.exit(close(f))
	} else {
		f <- ""
	}

	###############################################################
	# text format

	d <- data.frame(Allele=object$detail$allele, stringsAsFactors=FALSE)

	if (!is.null(object$detail$train.num))
		d$NumTrain <- object$detail$train.num
	if (!is.null(object$detail$train.freq))
	{
		d$FreqTrain <- sprintf("%0.4f", object$detail$train.freq)
		d$FreqTrain[d$NumTrain <= 0] <- "0"

		L1 <- c("Allele", "Num.", "Freq.", "Num.", "Freq.",
			"CR", "ACC", "SEN", "SPE", "PPV", "NPV", "Miscall")
		L2 <- c("", "Train", "Train", "Valid.", "Valid.",
			"(%)", "(%)", "(%)", "(%)", "(%)", "(%)", "(%)")
		L2a <- c("", "Train", "Train", "Valid.", "Valid.",
			"(\\%)", "(\\%)", "(\\%)", "(\\%)", "(\\%)", "(\\%)", "(\\%)")
	} else {
		L1 <- c("Allele", "Num.", "Freq.",
			"CR", "ACC", "SEN", "SPE", "PPV", "NPV", "Miscall")
		L2 <- c("", "Valid.", "Valid.",
			"(%)", "(%)", "(%)", "(%)", "(%)", "(%)", "(%)")
		L2a <- c("", "Valid.", "Valid.",
			"(\\%)", "(\\%)", "(\\%)", "(\\%)", "(\\%)", "(\\%)", "(\\%)")
	}

	d$NumValid <- object$detail$valid.num
	d$FreqValid <- sprintf("%0.4f", object$detail$valid.freq)
	d$FreqValid[d$NumValid <= 0] <- "0"

	d$CR <- sprintf("%0.1f", object$detail$call.rate*100)
	d$CR[object$detail$call.rate < 0.0005] <- "--"

	d$ACC <- sprintf("%0.1f", object$detail$accuracy*100)
	d$ACC[!is.finite(object$detail$accuracy)] <- "--"

	d$SEN <- sprintf("%0.1f", object$detail$sensitivity*100)
	d$SEN[!is.finite(object$detail$sensitivity)] <- "--"

	d$SPE <- sprintf("%0.1f", object$detail$specificity*100)
	d$SPE[!is.finite(object$detail$specificity)] <- "--"

	d$PPV <- sprintf("%0.1f", object$detail$ppv*100)
	d$PPV[!is.finite(object$detail$ppv)] <- "--"

	d$NPV <- sprintf("%0.1f", object$detail$npv*100)
	d$NPV[!is.finite(object$detail$npv)] <- "--"

	d$Miscall <- sprintf("%s (%0.0f)",
		object$detail$miscall, object$detail$miscall.prop*100)
	d$Miscall[is.na(object$detail$miscall) | !is.finite(object$detail$miscall.prop)] <- "--"


	if (type == "txt")
	{
		cat(L1, file=f, sep="\t", append=TRUE)
		cat("\n", file=f, append=TRUE)
		cat(L2, file=f, sep="\t", append=TRUE)
		cat("\n", file=f, append=TRUE)
		cat("----\n", file=f, append=TRUE)
		cat(sprintf("Overall accuracy: %0.1f%%, Call rate: %0.1f%%\n",
			object$overall$acc.haplo*100, object$overall$call.rate*100),
			file=f, append=TRUE)
		write.table(d, file=f, append=TRUE, quote=FALSE, sep="\t",
			row.names=FALSE, col.names=FALSE)

	} else if (type == "tex")
	{
		if (header)
		{
			cat("\\title{Imputation Evaluation}", "",
				"\\documentclass[12pt]{article}", "",
				"\\usepackage{fullpage}",
				"\\usepackage{longtable}", "",
				"\\begin{document}", "",
				"\\maketitle", "",
				"\\setlength{\\LTcapwidth}{6.4in}", "",
				file=f, append=TRUE, sep="\n")
		}

		cat("% -------- BEGIN TABLE --------\n", file=f, append=TRUE)
		if (!is.null(object$detail$train.freq))
		{
			cat("\\begin{longtable}{rrrrr | rrrrrrl}\n", file=f, append=TRUE)
		} else {
			cat("\\begin{longtable}{rrr | rrrrrrl}\n", file=f, append=TRUE)
		}
		cat(c("\\caption{The sensitivity (SEN), specificity (SPE),",
			"positive predictive value (PPV), negative predictive value",
			"(NPV) and call rate (CR)}\n"), file=f, append=TRUE, sep=" ")
		cat("\\label{tab:accuracy} \\\\\n", file=f, append=TRUE)

		cat(L1, file=f, sep=" & ", append=TRUE)
		cat(" \\\\\n", file=f, append=TRUE)
		cat(L2a, file=f, sep=" & ", append=TRUE)
		cat(" \\\\\n", file=f, append=TRUE)
		cat("\\hline\\hline\n\\endfirsthead\n", file=f, append=TRUE)

		if (!is.null(object$detail$train.freq))
		{
			cat("\\multicolumn{12}{c}{{\\normalsize \\tablename\\ \\thetable{} -- Continued from previous page}} \\\\\n",
				file=f, append=TRUE)
		} else {
			cat("\\multicolumn{10}{c}{{\\normalsize \\tablename\\ \\thetable{} -- Continued from previous page}} \\\\\n",
				file=f, append=TRUE)
		}

		cat(L1, file=f, sep=" & ", append=TRUE)
		cat(" \\\\\n", file=f, append=TRUE)
		cat(L2a, file=f, sep=" & ", append=TRUE)
		cat(" \\\\\n", file=f, append=TRUE)
		cat("\\hline\\hline\n\\endhead\n\\hline\n", file=f, append=TRUE)

		if (!is.null(object$detail$train.freq))
		{
			cat("\\multicolumn{12}{r}{Continued on next page ...} \\\\\n",
				file=f, append=TRUE)
		} else {
			cat("\\multicolumn{10}{r}{Continued on next page ...} \\\\\n",
				file=f, append=TRUE)
		}

		cat("\\hline\n\\endfoot\n\\hline\\hline\n\\endlastfoot\n",
			file=f, append=TRUE)

		if (!is.null(object$detail$train.freq))
		{
			cat(sprintf(
				"\\multicolumn{12}{l}{\\it Overall accuracy: %0.1f\\%%, Call rate: %0.1f\\%%} \\\\\n",
				object$overall$acc.haplo*100, object$overall$call.rate*100),
				file=f, append=TRUE)
		} else {
			cat(sprintf(
				"\\multicolumn{10}{l}{\\it Overall accuracy: %0.1f\\%%, Call rate: %0.1f\\%%} \\\\\n",
				object$overall$acc.haplo*100, object$overall$call.rate*100),
				file=f, append=TRUE)
		}

		write.table(d, file=f, append=TRUE, quote=FALSE, sep=" & ",
			row.names=FALSE, col.names=FALSE, eol=" \\\\\n")

		cat("\\end{longtable}\n% -------- END TABLE --------\n",
			file=f, append=TRUE)

		if (header)
		{
			cat("\n\\end{document}\n", file=f, append=TRUE)
		}
	} else if (type == "html")
	{
		if (header)
		{
			cat("<!DOCTYPE html>",
				"<html>",
				"<head>",
				"  <title>Imputation Evaluation</title>",
				"</head>",
				"<body>",
				file=f, append=TRUE, sep="\n")
		}

		cat("<h1>Imputation Evaluation</h1>",
			"<p></p>",
			"<h3><b>Table 1:</b> The sensitivity (SEN), specificity (SPE), positive predictive value (PPV), negative predictive value (NPV) and call rate (CR).</h3>",
			file=f, append=TRUE, sep="\n")

		cat("<table id=\"TB-Acc\" class=\"tabular\" border=\"1\"  CELLSPACING=\"1\">",
			"<tr>", 
				paste(paste("<th>", L1, " ", L2, "</th>", sep=""), collapse=" "),
			"</tr>",
			"<tr>",
			paste("<td colspan=\"", length(L1), "\">", sep=""),
			sprintf("<i> Overall accuracy: %0.1f%%, Call rate: %0.1f%% </i>",
				object$overall$acc.haplo*100, object$overall$call.rate*100),
			"</td>",
			"</tr>",
			file=f, append=TRUE, sep="\n")

		for (i in 1:nrow(d))
		{
			cat("<tr>", 
				paste(paste("<td>", d[i, ], "</td>", sep=""), collapse=" "),
				"</tr>",
				file=f, append=TRUE, sep="\n")
		}

		cat("</table>\n", file=f, append=TRUE)

		if (header)
		{
			cat("\n</body>\n</html>\n", file=f, append=TRUE)
		}
	}

	invisible()
}


##########################################################################
##########################################################################
#
# Linkage Disequilibrium
#

##########################################################################
# To calculate linkage disequilibrium between HLA locus and SNP markers
#

hlaGenoLD <- function(hla, geno)
{
	# check
	stopifnot(inherits(hla, "hlaAlleleClass"))
	if (inherits(geno, "hlaSNPGenoClass"))
	{
		stopifnot(dim(hla$value)[1] == length(geno$sample.id))
		if (any(hla$value$sample.id != geno$sample.id))
		{
			hla <- hlaAlleleSubset(hla, samp.sel =
				match(geno$sample.id, hla$value$sample.id))
		}
		geno <- geno$genotype
	} else if (is.matrix(geno))
	{
		stopifnot(is.numeric(geno))
		stopifnot(dim(hla$value)[1] == dim(geno)[2])
	} else if (is.vector(geno))
	{
		stopifnot(is.numeric(geno))
		stopifnot(dim(hla$value)[1] == length(geno))
		geno <- matrix(geno, ncol=1)
	} else {
		stop("geno should be `hlaSNPGenoClass', a vector or a matrix.")
	}

	# HLA alleles indicators
	alleles <- unique(c(hla$value$allele1, hla$value$allele2))
	alleles <- alleles[order(alleles)]
	allele.mat <- matrix(as.integer(0),
		nrow=length(hla$value$allele1), ncol=length(alleles))
	for (i in 1:length(alleles))
	{
		allele.mat[, i] <- (hla$value$allele1==alleles[i]) +
			(hla$value$allele2==alleles[i])
	}

	apply(geno, 1,
		function(x, allele.mat) {
			suppressWarnings(mean(cor(x, allele.mat, use="pairwise.complete.obs")^2, na.rm=TRUE))
		},
		allele.mat=allele.mat)
}




##########################################################################
##########################################################################
#
# Visualization
#

##########################################################################
# To visualize an attribute bagging model
#

plot.hlaAttrBagClass <- function(x, ...)
{
	obj <- hlaModelToObj(x)
	plot(obj, ...)
}

print.hlaAttrBagClass <- function(x, ...)
{
	obj <- hlaModelToObj(x)
	print(obj)
	invisible()
}



##########################################################################
# To visualize an attribute bagging model
#

plot.hlaAttrBagObj <- function(x, xlab=NULL, ylab=NULL,
	locus.color="red", locus.lty=2, locus.cex=1.25,
	assembly=c("auto", "hg18", "hg19", "unknown"), ...)
{
	# check
	stopifnot(inherits(x, "hlaAttrBagObj"))
	assembly <- match.arg(assembly)

	# the starting and ending positions of HLA locus
	if (assembly == "auto")
	{
		if (!is.null(x$assembly))
			assembly <- x$assembly
	}
	info <- hlaLociInfo(assembly)
	pos.start <- info$pos.HLA.start[[x$hla.locus]]/1000
	pos.end <- info$pos.HLA.end[[x$hla.locus]]/1000

	# summary of the attribute bagging model
	desp <- summary(x, show=FALSE)

	# x - label, y - label
	if (is.null(xlab)) xlab <- "SNP Position (KB)"
	if (is.null(ylab)) ylab <- "Frequency of Use"

	# draw
	plot(x$snp.position/1000, desp$snp.hist, xlab=xlab, ylab=ylab, ...)
	abline(v=pos.start, col=locus.color, lty=locus.lty)
	abline(v=pos.end, col=locus.color, lty=locus.lty)
	text((pos.start + pos.end)/2, max(desp$snp.hist), paste("HLA", x$hla.locus, sep="-"),
		col=locus.color, cex=locus.cex)
}

print.hlaAttrBagObj <- function(x, ...)
{
	summary(x)
	invisible()
}





#######################################################################
# To get the resources of HIBAG models
#

hlaResource <- function()
{
	read.table(
		"http://dl.dropboxusercontent.com/u/51499461/HIBAG/ResourceList.txt",
		header=TRUE, stringsAsFactors=FALSE, sep="\t",
		colClasses="character")
}



#######################################################################
# To get the error message
#

hlaErrMsg <- function()
{
	.Call(HIBAG_ErrMsg)
}



#######################################################################
# Internal R library functions
#######################################################################

.onAttach <- function(lib, pkg)
{
	# initialize HIBAG
	SSE.Flag <- .Call(HIBAG_Init)

	# information
	packageStartupMessage(
		"HIBAG (HLA Genotype Imputation with Attribute Bagging): v1.2.5")
	if (SSE.Flag == 1)
		s <- "Supported by Streaming SIMD Extensions (SSE2)"
	else if (SSE.Flag == 2)
		s <- "Supported by Streaming SIMD Extensions (SSE4.2 + hardware POPCNT)"
	else
		s <- ""
	if (s != "") packageStartupMessage(s)

	TRUE
}

.Last.lib <- function(libpath)
{
	# finalize HIBAG
	.Call(HIBAG_Done)
	TRUE
}