R/hss.R
In hsphase: Phasing, Pedigree Reconstruction, Sire Imputation and Recombination Events Identification of Half-sib Families Using SNP Data

Documented in hss readGenotype

readGenotype <- function(genotypePath, separatorGenotype = " ", check = TRUE)
{
	debut <- proc.time()
	print("Reading the genotype file ...")
	nline <- readLines(genotypePath, 2)
	if (check) 
		if (length(unlist(strsplit(nline[1], split = separatorGenotype))) != (length(unlist(strsplit(nline[2], split = separatorGenotype)))) - 1)
		{
			print(paste("Line 1:", length(unlist(strsplit(nline[1], split = separatorGenotype)))))
			print(paste("Line 2:", length(unlist(strsplit(nline[2], split = separatorGenotype)))))
			print("The first line must have one element less than line two, possible reason: ")
			stop("The animal IDs must not have a column name (The first line must only contain SNPs' names)")
		}
	ncols <- length(unlist(strsplit(nline[2], split = separatorGenotype)))
	if (check) 
		if (ncols < 101)
		{
			stop("The genotype file must have more than 100 columns.")
		}
	dataTemp <- scan(genotypePath, what = c("character", rep("numeric", ncols - 1)), skip = 1, , sep = separatorGenotype)
	index <- seq(from = 1, to = length(dataTemp), by = ncols)
	indexneg <- setdiff(1:length(dataTemp), index)
	if (check) 
		if (length(indexneg)%%(ncols - 1) != 0) 
			stop("All rows must have the same number of elements (The separator may set incorrectly)")
	genotype <- matrix(as.integer(dataTemp[indexneg]), ncol = (ncols - 1), byrow = T)
	rownames(genotype) <- dataTemp[index]
	colnames(genotype) <- unlist(strsplit(nline[1], split = separatorGenotype))
	if (check) 
		if (any(is.na(genotype))) 
			stop("Genotype must contain only integer data (After excluding SNP namse and IDs")
	if (check) 
		if (length(genotype[genotype != 9 & genotype != 0 & genotype != 2 & genotype != 1]) > 0) 
			stop("Genotype must contain only 0, 1, 2 or 9")
	if (check) 
		if (nrow(genotype) < 4)
		{
			stop("The genotype file must have more than 4 rows")
		}
	print(proc.time() - debut)
	print("Finished!")
	genotype
}

hss <- function(pedigree, genotype, check = TRUE)
{
	debut <- proc.time()
	
	if (check) 
	{
		if(length(rownames(genotype)[duplicated(rownames(genotype))])>0)
		{
			stop("There are duplicated ID in the input file")
		}
		if (any(is.na(genotype))) 
			stop("Genotype must contain only integer data (After excluding SNP namse and IDs")
		if (length(genotype[genotype != 9 & genotype != 0 & genotype != 2 & genotype != 1]) > 0) 
			stop("Genotype must contain only 0, 1, 2 or 9")
		if (nrow(genotype) < 4)
		{
			stop("The genotype file must have more than 4 rows")
		}
		if (ncol(pedigree) < 2) 
			stop("Pedigree must have at least 2 columns")
	}
	pedigree <- as.matrix(pedigree[, 1:2])
	pedigree <- pedigree[!(pedigree[, 2] == ""), ]
	pedigree <- pedigree[pedigree[, 2] != 0, ]
	pedigree <- pedigree[pedigree[, 1] %in% rownames(genotype), ]
	## sires <- aggregate(pedigree[, 1], by = list(pedigree[, 2]), function(x) x, simplify = F)
	sires <- tapply(pedigree[, 1], pedigree[, 2], function(x) x, simplify = F) # Faster
	nhalfsibs <- as.integer(lapply(sires, length))
	sires <- sires[which(nhalfsibs > 3)]
	gc()
	print(paste("There are", nrow(sires), "half-sib groups (Based on the pedigree and genotype)"))
	halfsib <- list(nrow(sires))
	genotype <- as.matrix(genotype)
	if (check) 
		if (length(genotype[genotype != 9 & genotype != 0 & genotype != 2 & genotype != 1]) > 0) 
			stop("GenotypeMatrix must contain only 0, 1, 2 or 9")
	ncols <- numeric(nrow(genotype))
	print("Half-sib groups separator")
	for (i in 1:nrow(sires))
	{
		print(paste(i, nrow(sires)))
		halfsib[[i]] <- as.matrix(genotype[which(rownames(genotype) %in% as.character(as.vector(sires[[i]]))), ])
		names(halfsib)[i] <- names(sires)[i]
		if (ncol(halfsib[[i]]) == 1) 
			halfsib[[i]] <- t(halfsib[[i]])
		halfsib[[i]] <- na.omit(halfsib[[i]])
		ncols[i] <- ncol(halfsib[[i]])
	}
	halfsib[lapply(halfsib, nrow) < 4 | lapply(halfsib, ncol) < 100] <- NULL
	print(paste("There are", length(halfsib), "half-sib groups"))
	print(proc.time() - debut)
	print("Finished!")
	gc()
	halfsib
}

Any scripts or data that you put into this service are public.

hsphase documentation built on April 12, 2025, 1:13 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

hsphase
Phasing, Pedigree Reconstruction, Sire Imputation and Recombination Events Identification of Half-sib Families Using SNP Data

R/hss.R
In hsphase: Phasing, Pedigree Reconstruction, Sire Imputation and Recombination Events Identification of Half-sib Families Using SNP Data

Defines functions hss readGenotype

Documented in hss readGenotype

Try the hsphase package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

hsphase Phasing, Pedigree Reconstruction, Sire Imputation and Recombination Events Identification of Half-sib Families Using SNP Data

R/hss.R In hsphase: Phasing, Pedigree Reconstruction, Sire Imputation and Recombination Events Identification of Half-sib Families Using SNP Data

Defines functions hss readGenotype

Documented in hss readGenotype

Try the hsphase package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

hsphase
Phasing, Pedigree Reconstruction, Sire Imputation and Recombination Events Identification of Half-sib Families Using SNP Data

R/hss.R
In hsphase: Phasing, Pedigree Reconstruction, Sire Imputation and Recombination Events Identification of Half-sib Families Using SNP Data