Nothing
readVCFchunk <- function(vcffile, samplenames=FALSE){
### DIPLOID DATA
# if(file.exists("SNPRObjects")){unlink("SNPRObjects",recursive=TRUE)}
# Get the sample names !
vcf_handle <- file(vcffile)
open(vcf_handle)
while(1){
line <- readLines(vcf_handle,n=1)
patt <- substr(line,1,2)
if(patt!="##"){break}
}
line <- strsplit(line,"\t")[[1]]
individuals <- line[10:length(line)]
close(vcf_handle)
#
# End of get individuals
# specify colClasses
spec <- c("NULL","integer","NULL","character",
"character", rep("NULL",4),rep("character",length(individuals)))
vcf <- read.table(vcffile, sep="\t", colClasses=spec)
# Save infos from vcf chunk
pos <- vcf[,1]
ref <- vcf[,2]
alt <- vcf[,3]
ind <- vcf[,4:dim(vcf)[2]]
# FIRST HAPLOTYPE MATRIX !!! #############################################################################
ind_MATRIX <- matrix(,dim(ind)[2],dim(ind)[1])
# fill individual matrix
op <- options(warn = (-1)) # keine Warnmeldungen !
for( xx in 1:dim(ind)[2] ){
ind_MATRIX[xx,] <- as.integer(substr(ind[,xx], 1, 1)) # 1/1
}
options(op) # Warnmeldungen wieder an !
ind_MATRIX <- ind_MATRIX + 1
# create ALT MATRIX
ALTlist <- strsplit(alt,",")
n.val <- sapply(ALTlist,function(x){return(length(x))})
alt_MATRIX <- matrix(,length(pos),max(n.val))
for(xx in 1:length(ALTlist)){
alt_MATRIX[xx,1:length(ALTlist[[xx]])] <- ALTlist[[xx]]
}
# MAP Matrix
MAP_MATRIX <- cbind(ref,alt_MATRIX)
# Create Coding
Code_matrix <- matrix(,dim(ind_MATRIX)[1], length(pos))
for(xx in 1:dim(MAP_MATRIX)[1]){
Code_matrix[,xx] <- MAP_MATRIX[xx,][ind_MATRIX[,xx]]
}
Code_matrix <- .Call("code_nucs",Code_matrix)
# ----------------------------------------------
# Set the rownames
if(samplenames[1]!=FALSE){
rownames(Code_matrix) <- samplenames
}else{
rownames(Code_matrix) <- individuals
}
HAPMATRIX1 <- Code_matrix
rm(Code_matrix)
### END OF FIRST HAPLOTYPE MATRIX !!! ##############################################################
### SECOND HAPLOTYPE MATRIX ########################################################################
ind_MATRIX <- matrix(,dim(ind)[2],dim(ind)[1])
# fill individual matrix
op <- options(warn = (-1)) # keine Warnmeldungen !
for( xx in 1:dim(ind)[2] ){
ind_MATRIX[xx,] <- as.integer(substr(ind[,xx], 3, 3)) # 1/1
}
options(op) # Warnmeldungen wieder an !
ind_MATRIX <- ind_MATRIX + 1
Code_matrix <- matrix(,dim(ind_MATRIX)[1], length(pos))
for(xx in 1:dim(MAP_MATRIX)[1]){
Code_matrix[,xx] <- MAP_MATRIX[xx,][ind_MATRIX[,xx]]
}
Code_matrix <- .Call("code_nucs",Code_matrix)
gc()
# ----------------------------------------------
# Set the rownames
if(samplenames[1]!=FALSE){
rownames(Code_matrix) <- paste(samplenames,".2",sep="")
}else{
rownames(Code_matrix) <- paste(individuals,".2",sep="")
}
HAPMATRIX2 <- Code_matrix
### END OF SECOND HAPLOTYPE MATRIX ################################################################
Code_matrix <- rbind(HAPMATRIX1,HAPMATRIX2)
rm(HAPMATRIX1)
rm(HAPMATRIX2)
gc()
# modify row Order
NN <- rownames(Code_matrix)
names(NN) <- 1:length(NN)
NN <- sort(NN)
ids <- as.integer(names(NN))
Code_matrix <- Code_matrix[ids,,drop=FALSE]
names(NN) <- NULL
rownames(Code_matrix) <- NN
# ---------------
o_b_j_sub <- list(matrix=Code_matrix,reference=NaN,positions=pos)
return(o_b_j_sub)
}
# END DIPLOID DATA
# Outsource data # ---------------------------------------
# dir.create("SNPRObjects")
# o_b_j_sub <- list(matrix=Code_matrix,reference=NaN,positions=pos)
# save(o_b_j_sub,file=file.path(getwd(),"SNPRObjects",paste("chr",sep="")))
#---------------------------------------------------------
# path <- file.path(getwd(),"SNPRObjects")
# res <- readData(path,format="RData",big.data=TRUE,SNP.DATA=TRUE)
#unlink("SNPRObjects", recursive=TRUE)
#return(res)
#}
###############################################################################################################
readVCFchunk_tet <- function(vcffile, samplenames=FALSE){
### Tetraploid DATA
# if(file.exists("SNPRObjects")){unlink("SNPRObjects",recursive=TRUE)}
# Get the sample names !
vcf_handle <- file(vcffile)
open(vcf_handle)
while(1){
line <- readLines(vcf_handle,n=1)
patt <- substr(line,1,2)
if(patt!="##"){break}
}
line <- strsplit(line,"\t")[[1]]
individuals <- line[10:length(line)]
close(vcf_handle)
#
# End of get individuals
# specify colClasses
spec <- c("NULL","integer","NULL","character",
"character", rep("NULL",4),rep("character",length(individuals)))
vcf <- read.table(vcffile, sep="\t", colClasses=spec)
# Save infos from vcf chunk
pos <- vcf[,1]
ref <- vcf[,2]
alt <- vcf[,3]
ind <- vcf[,4:dim(vcf)[2]]
# FIRST HAPLOTYPE MATRIX !!! #############################################################################
ind_MATRIX <- matrix(,dim(ind)[2],dim(ind)[1])
# fill individual matrix
op <- options(warn = (-1)) # keine Warnmeldungen !
for( xx in 1:dim(ind)[2] ){
ind_MATRIX[xx,] <- as.integer(substr(ind[,xx], 1, 1)) # 1/1
}
options(op) # Warnmeldungen wieder an !
ind_MATRIX <- ind_MATRIX + 1
# create ALT MATRIX
ALTlist <- strsplit(alt,",")
n.val <- sapply(ALTlist,function(x){return(length(x))})
alt_MATRIX <- matrix(,length(pos),max(n.val))
for(xx in 1:length(ALTlist)){
alt_MATRIX[xx,1:length(ALTlist[[xx]])] <- ALTlist[[xx]]
}
# MAP Matrix
MAP_MATRIX <- cbind(ref,alt_MATRIX)
# Create Coding
Code_matrix <- matrix(,dim(ind_MATRIX)[1], length(pos))
for(xx in 1:dim(MAP_MATRIX)[1]){
Code_matrix[,xx] <- MAP_MATRIX[xx,][ind_MATRIX[,xx]]
}
Code_matrix <- .Call("code_nucs",Code_matrix)
# ----------------------------------------------
# Set the rownames
if(samplenames[1]!=FALSE){
rownames(Code_matrix) <- samplenames
}else{
rownames(Code_matrix) <- individuals
}
HAPMATRIX1 <- Code_matrix
rm(Code_matrix)
### END OF FIRST HAPLOTYPE MATRIX !!! ##############################################################
### SECOND HAPLOTYPE MATRIX ########################################################################
ind_MATRIX <- matrix(,dim(ind)[2],dim(ind)[1])
# fill individual matrix
op <- options(warn = (-1)) # keine Warnmeldungen !
for( xx in 1:dim(ind)[2] ){
ind_MATRIX[xx,] <- as.integer(substr(ind[,xx], 3, 3)) # 1/1
}
options(op) # Warnmeldungen wieder an !
ind_MATRIX <- ind_MATRIX + 1
Code_matrix <- matrix(,dim(ind_MATRIX)[1], length(pos))
for(xx in 1:dim(MAP_MATRIX)[1]){
Code_matrix[,xx] <- MAP_MATRIX[xx,][ind_MATRIX[,xx]]
}
Code_matrix <- .Call("code_nucs",Code_matrix)
gc()
# ----------------------------------------------
# Set the rownames
if(samplenames[1]!=FALSE){
rownames(Code_matrix) <- paste(samplenames,".2",sep="")
}else{
rownames(Code_matrix) <- paste(individuals,".2",sep="")
}
HAPMATRIX2 <- Code_matrix
### END OF SECOND HAPLOTYPE MATRIX ################################################################
### THIRD HAPLOTYPE MATRIX ########################################################################
ind_MATRIX <- matrix(,dim(ind)[2],dim(ind)[1])
# fill individual matrix
op <- options(warn = (-1)) # keine Warnmeldungen !
for( xx in 1:dim(ind)[2] ){
ind_MATRIX[xx,] <- as.integer(substr(ind[,xx], 5, 5)) # 1/1
}
options(op) # Warnmeldungen wieder an !
ind_MATRIX <- ind_MATRIX + 1
Code_matrix <- matrix(,dim(ind_MATRIX)[1], length(pos))
for(xx in 1:dim(MAP_MATRIX)[1]){
Code_matrix[,xx] <- MAP_MATRIX[xx,][ind_MATRIX[,xx]]
}
Code_matrix <- .Call("code_nucs",Code_matrix)
gc()
# ----------------------------------------------
# Set the rownames
if(samplenames[1]!=FALSE){
rownames(Code_matrix) <- paste(samplenames,".3",sep="")
}else{
rownames(Code_matrix) <- paste(individuals,".3",sep="")
}
HAPMATRIX3 <- Code_matrix
### END OF THIRD HAPLOTYPE MATRIX ################################################################
### FOURTH HAPLOTYPE MATRIX ########################################################################
ind_MATRIX <- matrix(,dim(ind)[2],dim(ind)[1])
# fill individual matrix
op <- options(warn = (-1)) # keine Warnmeldungen !
for( xx in 1:dim(ind)[2] ){
ind_MATRIX[xx,] <- as.integer(substr(ind[,xx], 7, 7)) # 1/1
}
options(op) # Warnmeldungen wieder an !
ind_MATRIX <- ind_MATRIX + 1
Code_matrix <- matrix(,dim(ind_MATRIX)[1], length(pos))
for(xx in 1:dim(MAP_MATRIX)[1]){
Code_matrix[,xx] <- MAP_MATRIX[xx,][ind_MATRIX[,xx]]
}
Code_matrix <- .Call("code_nucs",Code_matrix)
gc()
# ----------------------------------------------
# Set the rownames
if(samplenames[1]!=FALSE){
rownames(Code_matrix) <- paste(samplenames,".4",sep="")
}else{
rownames(Code_matrix) <- paste(individuals,".4",sep="")
}
HAPMATRIX4 <- Code_matrix
### END OF FOURTH HAPLOTYPE MATRIX ################################################################
Code_matrix <- rbind(HAPMATRIX1,HAPMATRIX2,HAPMATRIX3,HAPMATRIX4)
rm(HAPMATRIX1)
rm(HAPMATRIX2)
rm(HAPMATRIX3)
rm(HAPMATRIX4)
gc()
# modify row Order
NN <- rownames(Code_matrix)
names(NN) <- 1:length(NN)
NN <- sort(NN)
ids <- as.integer(names(NN))
Code_matrix <- Code_matrix[ids,,drop=FALSE]
names(NN) <- NULL
rownames(Code_matrix) <- NN
# ---------------
o_b_j_sub <- list(matrix=Code_matrix,reference=NaN,positions=pos)
return(o_b_j_sub)
}
# Outsource data # ---------------------------------------
# dir.create("SNPRObjects")
# o_b_j_sub <- list(matrix=Code_matrix,reference=NaN,positions=pos)
# save(o_b_j_sub,file=file.path(getwd(),"SNPRObjects",paste("chr",sep="")))
#---------------------------------------------------------
# path <- file.path(getwd(),"SNPRObjects")
# res <- readData(path,format="RData",big.data=TRUE,SNP.DATA=TRUE)
#unlink("SNPRObjects", recursive=TRUE)
#return(res)
#}
#################################################################################################END OF TETRAPLOID DATA
readVCFchunk_tri <- function(vcffile, samplenames=FALSE){
### Tetraploid DATA
# if(file.exists("SNPRObjects")){unlink("SNPRObjects",recursive=TRUE)}
# Get the sample names !
vcf_handle <- file(vcffile)
open(vcf_handle)
while(1){
line <- readLines(vcf_handle,n=1)
patt <- substr(line,1,2)
if(patt!="##"){break}
}
line <- strsplit(line,"\t")[[1]]
individuals <- line[10:length(line)]
close(vcf_handle)
#
# End of get individuals
# specify colClasses
spec <- c("NULL","integer","NULL","character",
"character", rep("NULL",4),rep("character",length(individuals)))
vcf <- read.table(vcffile, sep="\t", colClasses=spec)
# Save infos from vcf chunk
pos <- vcf[,1]
ref <- vcf[,2]
alt <- vcf[,3]
ind <- vcf[,4:dim(vcf)[2]]
# FIRST HAPLOTYPE MATRIX !!! #############################################################################
ind_MATRIX <- matrix(,dim(ind)[2],dim(ind)[1])
# fill individual matrix
op <- options(warn = (-1)) # keine Warnmeldungen !
for( xx in 1:dim(ind)[2] ){
ind_MATRIX[xx,] <- as.integer(substr(ind[,xx], 1, 1)) # 1/1
}
options(op) # Warnmeldungen wieder an !
ind_MATRIX <- ind_MATRIX + 1
# create ALT MATRIX
ALTlist <- strsplit(alt,",")
n.val <- sapply(ALTlist,function(x){return(length(x))})
alt_MATRIX <- matrix(,length(pos),max(n.val))
for(xx in 1:length(ALTlist)){
alt_MATRIX[xx,1:length(ALTlist[[xx]])] <- ALTlist[[xx]]
}
# MAP Matrix
MAP_MATRIX <- cbind(ref,alt_MATRIX)
# Create Coding
Code_matrix <- matrix(,dim(ind_MATRIX)[1], length(pos))
for(xx in 1:dim(MAP_MATRIX)[1]){
Code_matrix[,xx] <- MAP_MATRIX[xx,][ind_MATRIX[,xx]]
}
Code_matrix <- .Call("code_nucs",Code_matrix)
# ----------------------------------------------
# Set the rownames
if(samplenames[1]!=FALSE){
rownames(Code_matrix) <- samplenames
}else{
rownames(Code_matrix) <- individuals
}
HAPMATRIX1 <- Code_matrix
rm(Code_matrix)
### END OF FIRST HAPLOTYPE MATRIX !!! ##############################################################
### SECOND HAPLOTYPE MATRIX ########################################################################
ind_MATRIX <- matrix(,dim(ind)[2],dim(ind)[1])
# fill individual matrix
op <- options(warn = (-1)) # keine Warnmeldungen !
for( xx in 1:dim(ind)[2] ){
ind_MATRIX[xx,] <- as.integer(substr(ind[,xx], 3, 3)) # 1/1
}
options(op) # Warnmeldungen wieder an !
ind_MATRIX <- ind_MATRIX + 1
Code_matrix <- matrix(,dim(ind_MATRIX)[1], length(pos))
for(xx in 1:dim(MAP_MATRIX)[1]){
Code_matrix[,xx] <- MAP_MATRIX[xx,][ind_MATRIX[,xx]]
}
Code_matrix <- .Call("code_nucs",Code_matrix)
gc()
# ----------------------------------------------
# Set the rownames
if(samplenames[1]!=FALSE){
rownames(Code_matrix) <- paste(samplenames,".2",sep="")
}else{
rownames(Code_matrix) <- paste(individuals,".2",sep="")
}
HAPMATRIX2 <- Code_matrix
### END OF SECOND HAPLOTYPE MATRIX ################################################################
### THIRD HAPLOTYPE MATRIX ########################################################################
ind_MATRIX <- matrix(,dim(ind)[2],dim(ind)[1])
# fill individual matrix
op <- options(warn = (-1)) # keine Warnmeldungen !
for( xx in 1:dim(ind)[2] ){
ind_MATRIX[xx,] <- as.integer(substr(ind[,xx], 5, 5)) # 1/1
}
options(op) # Warnmeldungen wieder an !
ind_MATRIX <- ind_MATRIX + 1
Code_matrix <- matrix(,dim(ind_MATRIX)[1], length(pos))
for(xx in 1:dim(MAP_MATRIX)[1]){
Code_matrix[,xx] <- MAP_MATRIX[xx,][ind_MATRIX[,xx]]
}
Code_matrix <- .Call("code_nucs",Code_matrix)
gc()
# ----------------------------------------------
# Set the rownames
if(samplenames[1]!=FALSE){
rownames(Code_matrix) <- paste(samplenames,".3",sep="")
}else{
rownames(Code_matrix) <- paste(individuals,".3",sep="")
}
HAPMATRIX3 <- Code_matrix
### END OF THIRD HAPLOTYPE MATRIX ################################################################
Code_matrix <- rbind(HAPMATRIX1,HAPMATRIX2,HAPMATRIX3)
rm(HAPMATRIX1)
rm(HAPMATRIX2)
rm(HAPMATRIX3)
gc()
# modify row Order
NN <- rownames(Code_matrix)
names(NN) <- 1:length(NN)
NN <- sort(NN)
ids <- as.integer(names(NN))
Code_matrix <- Code_matrix[ids,,drop=FALSE]
names(NN) <- NULL
rownames(Code_matrix) <- NN
# ---------------
o_b_j_sub <- list(matrix=Code_matrix,reference=NaN,positions=pos)
return(o_b_j_sub)
}
# Outsource data # ---------------------------------------
# dir.create("SNPRObjects")
# o_b_j_sub <- list(matrix=Code_matrix,reference=NaN,positions=pos)
# save(o_b_j_sub,file=file.path(getwd(),"SNPRObjects",paste("chr",sep="")))
#---------------------------------------------------------
# path <- file.path(getwd(),"SNPRObjects")
# res <- readData(path,format="RData",big.data=TRUE,SNP.DATA=TRUE)
#unlink("SNPRObjects", recursive=TRUE)
#return(res)
#}
###############END OF TETRAPLOID DATA
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.