R/pre2.remove.genos.batch.R
In genMOSSplus: Application of MOSS algorithm to genome-wide association study (GWAS)

pre2.remove.genos.batch <- function(dir.dat, dir.ped=dir.dat, dir.out, dir.warning=dir.out, perc.snp=10, perc.patient=20, empty="0/0", num.nonsnp.col=5, prefix.dat, prefix.case, prefix.control, key.dat="", key.case="CASE", key.control="CONTROL", ending.dat=".dat", ending.case=".ped", ending.control=".ped") {
# Removes SNPs that contain more than perc.snp empty geno values,
# from all the corresponding .ped and .dat files in directory dir.dat.
# 
# Example:
# pre2.remove.genos.batch("/home/briollaislab/olga/curr/data/mach1in/hisformat", dir.out="/home/briollaislab/olga/curr/data/mach1in", dir.warning="/home/briollaislab/olga/curr/data/mach1in/warnings", empty="0/0")
#
# dir.dat - directory containing .ped and .dat files.
#    - If .ped files for some chromosomes are split into several files, these
#      files will be concatenated alphabetically.
#      Note: both CASE and CONTROL files must exist for each chromosome.
#    - .dat files must exist for all the chromosomes.  
#    Both files .ped and .dat must be tab separated. 
#        

if(missing(dir.dat)) stop("Name of input directory for .dat files must be provided.")
if(missing(dir.out)) stop("Name of output directory must be provided.")
if(missing(prefix.dat)) stop("Prefix of the .dat file name must be provided.")
if(missing(prefix.case)) stop("Prefix of the CASE .ped file name must be provided.")
if(missing(prefix.control)) stop("Prefix of the CONTROL .ped file name must be provided.")

# TODO: remove this line:
#source("pre2.remove.genos.R")
#source("get.file.name.R")
#source("get.chrom.num.R")


# *******************************************
# 1. Obtain all .dat, CASE, and CONTROL files
all.dat <- get.file.name(dir=dir.dat, prefix=prefix.dat, key=key.dat, ending=ending.dat)
all.case <- get.file.name(dir=dir.ped, prefix=prefix.case, key=key.case, ending=ending.case)
all.control <- get.file.name(dir=dir.ped, prefix=prefix.control, key=key.control, ending=ending.control)

if(length(all.dat) == 0 || length(all.case) == 0 || length(all.control) == 0)
	return()

# *******************************************
# 2. Combine multiple files for one chromosome into one:

new.case <- combine.same.chrom(dir.in=dir.ped, file.name=all.case, prefix=prefix.case, ending=ending.case)
new.control <- combine.same.chrom(dir.in=dir.ped, file.name=all.control, prefix=prefix.control, ending=ending.control)


# *******************************************
# 3. Match .ped and .dat and run the pre2.remove.genos()
 
chroms.case <- get.chrom.num(new.case, prefix=prefix.case)
chroms.control <- get.chrom.num(new.control, prefix=prefix.control)
chroms.dat <- get.chrom.num(all.dat, prefix=prefix.dat)

chroms.common <- intersect(chroms.case, chroms.control)
chroms.common <- intersect(chroms.common, chroms.dat)

i <- 1
while (i <= length(chroms.common)) {
	curr.chrom <- chroms.common[i]

	curr.dat <- all.dat[match(curr.chrom, chroms.dat)]
	curr.case <- new.case[match(curr.chrom, chroms.case)]
	curr.control <- new.control[match(curr.chrom, chroms.control)]

	pre2.remove.genos(file.dat=curr.dat, case.ped=curr.case, control.ped=curr.control, dir.dat=dir.dat, dir.out=dir.out, dir.warning=dir.warning, perc.snp=perc.snp, perc.patient=perc.patient, empty=empty, num.nonsnp.col=num.nonsnp.col)

        i <- i + 1
}

}



# Helper function to combine multiple files into one.
# Returns the list of resultant files, even those that haven't been combined.
combine.same.chrom <- function(dir.in, file.name, prefix, ending) {

	chroms <- get.chrom.num(file.name, prefix=prefix)
	uniq.shred <- unique(chroms)
	names <- rep("", length(uniq.shred))

	i <- 1
	while (i <= length(uniq.shred)) {
		shred.id <- which(chroms == uniq.shred[i])
		shred.name <- file.name[shred.id[1]]

		new.name <- paste(substr(shred.name, 1, nchar(shred.name) - nchar(ending)), ".all", ending, sep="")

		check.all <- grep(paste(".all", ending, "$", sep=""), file.name[shred.id])
		if(length(check.all) > 0)
			new.name <- file.name[check.all[1]]

		new.name.path <- paste(dir.in, "/", new.name, sep="")
		# Save the names of useful files to return later.
		if(length(shred.id) == 1)
			names[i] <- shred.name
		else
			names[i] <- new.name

		# Combine files for a Chromosome if it is saved in more than 1 file
		if(!file.exists(new.name.path) && length(shred.id) > 1) {
			shred.same <- file.name[shred.id]
			shred.comand <- paste(dir.in, "/", shred.same, sep="", collapse=" ")
			shred.comand <- paste("cat ", shred.comand, " > ", new.name.path, sep="")
			print(paste("Combining into: ", new.name, sep=""))
			system(shred.comand) # <----------------------------------------------------------------
		}
		i <- i + 1
	}

	return(names)


}

Any scripts or data that you put into this service are public.

genMOSSplus documentation built on May 1, 2019, 10:31 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

genMOSSplus
Application of MOSS algorithm to genome-wide association study (GWAS)

R/pre2.remove.genos.batch.R
In genMOSSplus: Application of MOSS algorithm to genome-wide association study (GWAS)

Try the genMOSSplus package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

genMOSSplus Application of MOSS algorithm to genome-wide association study (GWAS)

R/pre2.remove.genos.batch.R In genMOSSplus: Application of MOSS algorithm to genome-wide association study (GWAS)

Try the genMOSSplus package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

genMOSSplus
Application of MOSS algorithm to genome-wide association study (GWAS)

R/pre2.remove.genos.batch.R
In genMOSSplus: Application of MOSS algorithm to genome-wide association study (GWAS)