Nothing
# A function to read geno packedancestrymap files
gen_tibble_packedancestry <- function(
x,
...,
valid_alleles = c("A", "T", "C", "G"),
missing_alleles = c("0", "."),
backingfile = NULL,
quiet = FALSE) {
# Substitute .geno with .snp
map_file <- sub("\\.geno$", ".snp", x)
if (!file.exists(map_file)) {
stop("snp file ", map_file, " does not exist")
}
ind_file <- sub("\\.geno$", ".ind", x)
if (!file.exists(ind_file)) {
stop("ind file ", ind_file, " does not exist")
}
if (is.null(backingfile)) {
backingfile <- sub("\\.geno$", "", x)
}
if (file_ext(backingfile) == "bk") {
backingfile <- bigstatsr::sub_bk(backingfile, "")
}
# read the packed ancestry in chunks
# count the individuals
indiv_table <- utils::read.table(ind_file, header = FALSE)
names(indiv_table) <- c("id", "sex", "population")
no_individuals <- nrow(indiv_table)
# count the loci
loci_table <- utils::read.table(map_file, header = FALSE)
names(loci_table) <- c(
"name",
"chromosome",
"genetic_dist",
"position",
"allele_ref",
"allele_alt"
)
loci_table$allele_alt[loci_table$allele_alt == "X"] <- NA
no_variants <- nrow(loci_table)
# verify that these numbers are compatible with the geno file
conn <- file(x, "rb")
hd <- strsplit(readBin(conn, "character", n = 1), " +")[[1]]
close(conn)
if (no_individuals != as.numeric(hd[2])) {
stop(
"Number of individuals in geno file does not match the number of ",
"individuals in the ind file"
)
}
if (no_variants != as.numeric(hd[3])) {
stop(
"Number of variants in geno file does not match the number of ",
"variants in the snp file"
)
}
# create a matrix to store the data
file_backed_matrix <- bigstatsr::FBM.code256(
nrow = no_individuals,
ncol = no_variants,
code = bigsnpr::CODE_012,
backingfile = backingfile
)
# Fill the FBM from bedfile
reach_eof <- read_packedancestry(x, file_backed_matrix,
tab = get_packedancestry_code()
)
if (!reach_eof) warning("EOF of bedfile has not been reached.")
# save the fbm
file_backed_matrix$save()
# convert the info from the indiv and loci table
fam <- tibble(
family.ID = indiv_table$population,
sample.ID = indiv_table$id,
paternal.ID = 0,
maternal.ID = 0,
sex = dplyr::case_when(
indiv_table$sex %in% c("M", "male", 1) ~ 1L,
indiv_table$sex %in% c("F", "female", 2) ~ 2L,
TRUE ~ 0L
),
affection = 0,
ploidy = 2
)
map <- tibble(
chromosome = loci_table$chromosome,
marker.ID = loci_table$name,
genetic.dist = loci_table$genetic_dist,
physical.pos = loci_table$position,
# note the swap below due to the different conventions in plink vs
# packed ancestry
allele1 = loci_table$allele_alt,
allele2 = loci_table$allele_ref
)
bigsnp_obj <- structure(
list(
genotypes = file_backed_matrix,
fam = fam,
map = map
),
class = "bigSNP"
)
bigsnp_obj <- bigsnpr::snp_save(bigsnp_obj)
gen_tibble(
bigsnp_obj$genotypes$rds,
valid_alleles = valid_alleles,
missing_alleles = missing_alleles,
backingfile = backingfile,
quiet = TRUE
) # silence it as output will be generated by the parent function
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.