R/plinkcollr.R

Defines functions plinkcollr

Documented in plinkcollr

#' Apply plinkr to multiple files
#'
#' All the parameters of this function is documented in \code{\link{getPlinkParam}}.
#'
#' @import stringr
#' @export
#' @seealso \code{\link{getPlinkParam}} \code{\link{plinkr}}
#'
plinkcollr = function(
		D=NULL,
		K=NULL,
		a1_allele=NULL,
		a2_allele=NULL,
		adjust=NULL,
		all=NULL,
		all_pheno=NULL,
		allele1234=NULL,
		alleleACGT=NULL,
		allele_count=NULL,
		allow_extra_chr=NULL,
		allow_no_sex=NULL,
		alt_group=NULL,
		alt_snp=NULL,
		annotate=NULL,
		annotate_snp_field=NULL,
		aperm=NULL,
		assoc=NULL,
		attrib=NULL,
		attrib_indiv=NULL,
		autosome=NULL,
		autosome_num=NULL,
		autosome_xy=NULL,
		bcf=NULL,
		bd=NULL,
		bed=NULL,
		beta=NULL,
		bfile=NULL,
		bgen=NULL,
		biallelic_only=NULL,
		bim=NULL,
		blocks=NULL,
		blocks_inform_frac=NULL,
		blocks_max_kb=NULL,
		blocks_min_maf=NULL,
		blocks_recomb_highci=NULL,
		blocks_strong_highci=NULL,
		blocks_strong_lowci=NULL,
		bmerge=NULL,
		border=NULL,
		bp_space=NULL,
		case_only=NULL,
		cc=NULL,
		cell=NULL,
		cfile=NULL,
		chap=NULL,
		check_sex=NULL,
		chr=NULL,
		chr_set=NULL,
		ci=NULL,
		clump=NULL,
		clump_allow_overlap=NULL,
		clump_annotate=NULL,
		clump_best=NULL,
		clump_field=NULL,
		clump_index_first=NULL,
		clump_kb=NULL,
		clump_p1=NULL,
		clump_p2=NULL,
		clump_r2=NULL,
		clump_range=NULL,
		clump_range_border=NULL,
		clump_replicate=NULL,
		clump_snp_field=NULL,
		clump_verbose=NULL,
		cluster=NULL,
		cluster_missing=NULL,
		cm_map=NULL,
		cnv_blue=NULL,
		cnv_border=NULL,
		cnv_brown=NULL,
		cnv_check_no_overlap=NULL,
		cnv_count=NULL,
		cnv_del=NULL,
		cnv_disrupt=NULL,
		cnv_drop_no_segment=NULL,
		cnv_dup=NULL,
		cnv_enrichment_test=NULL,
		cnv_exclude=NULL,
		cnv_exclude_off_by_1=NULL,
		cnv_freq_excldue_above=NULL,
		cnv_freq_excldue_below=NULL,
		cnv_freq_excldue_exact=NULL,
		cnv_freq_exclude_above=NULL,
		cnv_freq_exclude_below=NULL,
		cnv_freq_exclude_exact=NULL,
		cnv_freq_incldue_exact=NULL,
		cnv_freq_include_exact=NULL,
		cnv_freq_method2=NULL,
		cnv_freq_overlap=NULL,
		cnv_green=NULL,
		cnv_indiv_perm=NULL,
		cnv_intersect=NULL,
		cnv_kb=NULL,
		cnv_list=NULL,
		cnv_make_map=NULL,
		cnv_max_kb=NULL,
		cnv_max_score=NULL,
		cnv_max_sites=NULL,
		cnv_overlap=NULL,
		cnv_red=NULL,
		cnv_region_overlap=NULL,
		cnv_report_regions=NULL,
		cnv_score=NULL,
		cnv_seglist=NULL,
		cnv_sites=NULL,
		cnv_subset=NULL,
		cnv_test=NULL,
		cnv_test_1sided=NULL,
		cnv_test_2sided=NULL,
		cnv_test_region=NULL,
		cnv_test_window=NULL,
		cnv_track=NULL,
		cnv_union_overlap=NULL,
		cnv_unique=NULL,
		cnv_verbose_report_regions=NULL,
		cnv_write=NULL,
		cnv_write_freq=NULL,
		complement_sets=NULL,
		compound_genotypes=NULL,
		compress=NULL,
		condition=NULL,
		condition_list=NULL,
		consensus_match=NULL,
		const_fid=NULL,
		control=NULL,
		counts=NULL,
		covar=NULL,
		covar_name=NULL,
		covar_number=NULL,
		cow=NULL,
		d=NULL,
		data=NULL,
		debug=NULL,
		decompress=NULL,
		dfam=NULL,
		distance=NULL,
		distance_exp=NULL,
		distance_matrix=NULL,
		dog=NULL,
		dominant=NULL,
		dosage=NULL,
		double_id=NULL,
		dprime=NULL,
		dummy=NULL,
		dummy_coding=NULL,
		each_versus_others=NULL,
		each_vs_others=NULL,
		epistasis=NULL,
		epistasis_summary_merge=NULL,
		exclude=NULL,
		exclude_before_extract=NULL,
		exclude_snp=NULL,
		exclude_snps=NULL,
		extract=NULL,
		fam=NULL,
		family=NULL,
		fast_epistasis=NULL,
		fid=NULL,
		file=NULL,
		fill_missing_a2=NULL,
		filter=NULL,
		filter_cases=NULL,
		filter_controls=NULL,
		filter_females=NULL,
		filter_founders=NULL,
		filter_males=NULL,
		filter_nonfounders=NULL,
		fisher=NULL,
		flip=NULL,
		flip_scan=NULL,
		flip_scan_threshold=NULL,
		flip_scan_verbose=NULL,
		flip_scan_window=NULL,
		flip_scan_window_kb=NULL,
		flip_subset=NULL,
		freq=NULL,
		freqx=NULL,
		from=NULL,
		from_bp=NULL,
		from_kb=NULL,
		from_mb=NULL,
		frqx=NULL,
		fst=NULL,
		gap=NULL,
		gates=NULL,
		gc=NULL,
		gen=NULL,
		gene=NULL,
		gene_all=NULL,
		gene_list=NULL,
		gene_list_border=NULL,
		gene_report=NULL,
		gene_report_empty=NULL,
		gene_report_snp_field=NULL,
		gene_subset=NULL,
		genedrop=NULL,
		genepi=NULL,
		geno=NULL,
		genome=NULL,
		genome_full=NULL,
		genome_lists=NULL,
		genome_minimal=NULL,
		genotypic=NULL,
		gfile=NULL,
		gplink=NULL,
		grm=NULL,
		grm_bin=NULL,
		grm_gz=NULL,
		group_avg=NULL,
		groupdist=NULL,
		gxe=NULL,
		hap...=NULL,
		hap=NULL,
		hap_assoc=NULL,
		hap_freq=NULL,
		hap_impute=NULL,
		hap_max_phase=NULL,
		hap_min_phase_prob=NULL,
		hap_miss=NULL,
		hap_phase=NULL,
		hap_phase_wide=NULL,
		hap_pp=NULL,
		hap_snps=NULL,
		hap_tdt=NULL,
		hap_window=NULL,
		hard_call_threshold=NULL,
		hardy2=NULL,
		hardy=NULL,
		help=NULL,
		het=NULL,
		hethom=NULL,
		hide_covar=NULL,
		homog=NULL,
		homozyg=NULL,
		homozyg_density=NULL,
		homozyg_gap=NULL,
		homozyg_group=NULL,
		homozyg_het=NULL,
		homozyg_include_missing=NULL,
		homozyg_kb=NULL,
		homozyg_match=NULL,
		homozyg_snp=NULL,
		homozyg_verbose=NULL,
		homozyg_window_het=NULL,
		homozyg_window_kb=NULL,
		homozyg_window_missing=NULL,
		homozyg_window_snp=NULL,
		homozyg_window_threshold=NULL,
		horse=NULL,
		hwe=NULL,
		hwe_all=NULL,
		ibc=NULL,
		ibm=NULL,
		ibs_matrix=NULL,
		ibs_test=NULL,
		id_delim=NULL,
		id_dict=NULL,
		id_match=NULL,
		iid=NULL,
		impossible=NULL,
		impute_sex=NULL,
		ind_major=NULL,
		indep=NULL,
		indep_pairphase=NULL,
		indep_pairwise=NULL,
		independent_effect=NULL,
		indiv_sort=NULL,
		inter_chr=NULL,
		interaction=NULL,
		je_cellmin=NULL,
		keep=NULL,
		keep_allele_order=NULL,
		keep_autoconv=NULL,
		keep_before_remove=NULL,
		keep_cluster_names=NULL,
		keep_clusters=NULL,
		keep_fam=NULL,
		lambda=NULL,
		lasso=NULL,
		lasso_select_covars=NULL,
		ld=NULL,
		ld_snp=NULL,
		ld_snp_list=NULL,
		ld_snps=NULL,
		ld_window=NULL,
		ld_window_kb=NULL,
		ld_window_r2=NULL,
		ld_xchr=NULL,
		lfile=NULL,
		liability=NULL,
		linear=NULL,
		list=NULL,
		list_23_indels=NULL,
		list_all=NULL,
		logistic=NULL,
		lookup...=NULL,
		lookup=NULL,
		lookup_gene=NULL,
		lookup_list=NULL,
		loop_assoc=NULL,
		maf=NULL,
		maf_succ=NULL,
		make_bed=NULL,
		make_founders=NULL,
		make_grm=NULL,
		make_grm_bin=NULL,
		make_grm_gz=NULL,
		make_just_bim=NULL,
		make_just_fam=NULL,
		make_perm_pheno=NULL,
		make_pheno=NULL,
		make_rel=NULL,
		make_set=NULL,
		make_set_border=NULL,
		make_set_collapse_group=NULL,
		make_set_complement_all=NULL,
		make_set_complement_group=NULL,
		map=NULL,
		mat=NULL,
		match=NULL,
		match_type=NULL,
		matrix=NULL,
		max=NULL,
		max_maf=NULL,
		mc=NULL,
		mcc=NULL,
		mcovar=NULL,
		mds_cluster=NULL,
		mds_plot=NULL,
		me=NULL,
		me_exclude_one=NULL,
		memory=NULL,
		mendel=NULL,
		mendel_duos=NULL,
		mendel_multigen=NULL,
		merge=NULL,
		merge_equal_pos=NULL,
		merge_list=NULL,
		merge_mode=NULL,
		merge_x=NULL,
		meta_analysis=NULL,
		meta_analysis_..._field=NULL,
		mfilter=NULL,
		mh=NULL,
		mhf=NULL,
		min=NULL,
		mind=NULL,
		mishap_window=NULL,
		missing=NULL,
		missing_code=NULL,
		missing_genotype=NULL,
		missing_phenotype=NULL,
		missing_var_code=NULL,
		mlma=NULL,
		mlma_loco=NULL,
		mlma_no_adj_covar=NULL,
		model=NULL,
		model_dom=NULL,
		model_gen=NULL,
		model_rec=NULL,
		model_trend=NULL,
		mouse=NULL,
		mperm=NULL,
		mperm_save=NULL,
		mperm_save_all=NULL,
		mpheno=NULL,
		must_have_sex=NULL,
		mwithin=NULL,
		neighbour=NULL,
		no_fid=NULL,
		no_parents=NULL,
		no_pheno=NULL,
		no_sex=NULL,
		no_snp=NULL,
		no_x_sex=NULL,
		nonfounders=NULL,
		nop=NULL,
		not_chr=NULL,
		nudge=NULL,
		null_group=NULL,
		null_snp=NULL,
		oblig_cluster=NULL,
		oblig_clusters=NULL,
		oblig_missing=NULL,
		out=NULL,
		output_chr=NULL,
		output_missing_genotype=NULL,
		output_missing_phenotype=NULL,
		oxford_pheno_name=NULL,
		parallel=NULL,
		parameters=NULL,
		parentdt1=NULL,
		parentdt2=NULL,
		pat=NULL,
		pca=NULL,
		pca_cluster_names=NULL,
		pca_clusters=NULL,
		ped=NULL,
		pedigree=NULL,
		perm=NULL,
		perm_batch_size=NULL,
		perm_count=NULL,
		pfilter=NULL,
		pheno=NULL,
		pheno_merge=NULL,
		pheno_name=NULL,
		pick1=NULL,
		plist=NULL,
		poo=NULL,
		pool_size=NULL,
		ppc=NULL,
		ppc_gap=NULL,
		proxy_...=NULL,
		proxy_assoc=NULL,
		proxy_b_kb=NULL,
		proxy_b_maxsnp=NULL,
		proxy_b_r2=NULL,
		proxy_b_threshold=NULL,
		proxy_b_window=NULL,
		proxy_dosage=NULL,
		proxy_drop=NULL,
		proxy_flanking=NULL,
		proxy_geno=NULL,
		proxy_genotypic_concordance=NULL,
		proxy_glm=NULL,
		proxy_impute=NULL,
		proxy_impute_threshold=NULL,
		proxy_kb=NULL,
		proxy_list=NULL,
		proxy_maf=NULL,
		proxy_maxsnp=NULL,
		proxy_mhf=NULL,
		proxy_r2=NULL,
		proxy_r2_no_filter=NULL,
		proxy_replace=NULL,
		proxy_show_proxies=NULL,
		proxy_sub_maxsnp=NULL,
		proxy_sub_r2=NULL,
		proxy_tdt=NULL,
		proxy_verbose=NULL,
		proxy_window=NULL,
		prune=NULL,
		q_score_file=NULL,
		q_score_range=NULL,
		qfam...=NULL,
		qmatch=NULL,
		qq_plot=NULL,
		qt=NULL,
		qt_means=NULL,
		qual_geno_...=NULL,
		qual_geno_max_threshold=NULL,
		qual_geno_scores=NULL,
		qual_geno_threshold=NULL,
		qual_max_threshold=NULL,
		qual_scores=NULL,
		qual_threshold=NULL,
		r2=NULL,
		r=NULL,
		range=NULL,
		rank=NULL,
		read_dists=NULL,
		read_freq=NULL,
		read_genome=NULL,
		read_genome_list=NULL,
		read_genome_minimal=NULL,
		recessive=NULL,
		recode12=NULL,
		recode=NULL,
		recodeA=NULL,
		recodeAD=NULL,
		recodeHV=NULL,
		recode_allele=NULL,
		recode_beagle=NULL,
		recode_bimbam=NULL,
		recode_fastphase=NULL,
		recode_lgen=NULL,
		recode_rlist=NULL,
		recode_structure=NULL,
		recode_vcf=NULL,
		recode_whap=NULL,
		reference=NULL,
		reference_allele=NULL,
		regress_distance=NULL,
		regress_pcs=NULL,
		regress_rel=NULL,
		rel_check=NULL,
		rel_cutoff=NULL,
		remove=NULL,
		remove_cluster_names=NULL,
		remove_clusters=NULL,
		remove_fam=NULL,
		rerun=NULL,
		rice=NULL,
		sample=NULL,
		score=NULL,
		score_no_mean_imputation=NULL,
		script=NULL,
		seed=NULL,
		set=NULL,
		set_by_all=NULL,
		set_collapse_all=NULL,
		set_hh_missing=NULL,
		set_max=NULL,
		set_me_missing=NULL,
		set_missing_nonsnp_ids=NULL,
		set_missing_snp_ids=NULL,
		set_missing_var_ids=NULL,
		set_names=NULL,
		set_p=NULL,
		set_r2=NULL,
		set_r2_phase=NULL,
		set_table=NULL,
		set_test=NULL,
		sex=NULL,
		sheep=NULL,
		show_tags=NULL,
		silent=NULL,
		simulate=NULL,
		simulate_haps=NULL,
		simulate_label=NULL,
		simulate_missing=NULL,
		simulate_n=NULL,
		simulate_ncases=NULL,
		simulate_ncontrols=NULL,
		simulate_prevalence=NULL,
		simulate_qt=NULL,
		simulate_tags=NULL,
		snp=NULL,
		snps=NULL,
		snps_only=NULL,
		specific_haplotype=NULL,
		split_x=NULL,
		standard_beta=NULL,
		subset=NULL,
		swap_parents=NULL,
		swap_sibs=NULL,
		swap_unrel=NULL,
		tab=NULL,
		tag_kb=NULL,
		tag_mode2=NULL,
		tag_r2=NULL,
		tail_pheno=NULL,
		tdt=NULL,
		test_all=NULL,
		test_mishap=NULL,
		test_missing=NULL,
		test_snp=NULL,
		tests=NULL,
		tfam=NULL,
		tfile=NULL,
		thin=NULL,
		thin_count=NULL,
		threads=NULL,
		to=NULL,
		to_bp=NULL,
		to_kb=NULL,
		to_mb=NULL,
		tped=NULL,
		transpose=NULL,
		trend=NULL,
		tucc=NULL,
		twolocus=NULL,
		unbounded=NULL,
		unrelated_heritability=NULL,
		update_alleles=NULL,
		update_chr=NULL,
		update_cm=NULL,
		update_ids=NULL,
		update_map=NULL,
		update_name=NULL,
		update_parents=NULL,
		update_sex=NULL,
		vcf=NULL,
		vcf_filter=NULL,
		vcf_half_call=NULL,
		vcf_idspace_to=NULL,
		vcf_min_qual=NULL,
		vegas=NULL,
		version=NULL,
		vif=NULL,
		whap=NULL,
		window=NULL,
		with_freqs=NULL,
		with_phenotype=NULL,
		with_reference=NULL,
		within=NULL,
		write_cluster=NULL,
		write_covar=NULL,
		write_dosage=NULL,
		write_set=NULL,
		write_set_r2=NULL,
		write_snplist=NULL,
		xchr_model=NULL,
		zero_cluster=NULL,
		zero_cms=NULL,
		one = NULL,
		twothreefile = NULL,
		plinkcollFileStems=NULL,
		wait=TRUE,
		stdout="",
		stderr=""
) {
	paramList = mget(names(formals()),sys.frame(sys.nframe()))

	# Should I wait for the process to finish?
	wait = paramList$wait
	paramList$wait = NULL

	# stdout and stderr settings, default is to the R console
	stdout = paramList$stdout
	paramList$stdout = NULL
	stderr = paramList$stderr
	paramList$stderr = NULL

	# get list of collapsed bed files
	plinkcollFileStems = paramList$plinkcollFileStems
	if(is.null(plinkcollFileStems)) stop("You must give a vector of bed files to apply plink upon")
	# debugps("Recieved bed files:")
	# debugpo(plinkcollFileStems)
	if(grepl("bed$", plinkcollFileStems[1], perl=TRUE)) {
		plinkcollFileStems = getstem(plinkcollFileStems)
		# debugps('Removing .bed extension...')
		# debugpo(plinkcollFileStems)
	}

    paramList$plinkcollFileStems = NULL
	paramVector = unlist(paramList)
	paramVector = paramVector[!is.null(paramVector)]
	paramVector = str_trim(paramVector)

	paramName = names(paramVector)
	names(paramVector) = NULL
	paramName = gsub("_", "-", paramName)
	paramName = paste("--", paramName, sep="")

	if("--one" %in% paramName) {
		idx = which(paramName == "--one")
		paramName[idx] = "--1"
	}
	if("--twothreefile" %in% paramName) {
		idx = which(paramName == "--twothreefile")
		paramName[idx] = "--23file"
	}

	nParam = length(paramName)
	idxOdd = seq(1, nParam * 2, 2)
	idxEven = seq(2, nParam * 2, 2)
	paramNameThenValue = character(nParam * 2)
	paramNameThenValue[idxOdd] = paramName
	paramNameThenValue[idxEven] = paramVector
	# debugps("Recieved parameters: ")
	# debugpo(paramNameThenValue)
	if("--out" %in% paramNameThenValue) stop("The 'out' option shouldn't be in thre batch mode")
	if("--bfile" %in% paramNameThenValue) stop("The 'bfile' option shouldn't be in there batch mode")


    nfiles = length(plinkcollFileStems)
    nscanned = 0
	for(bedFile in plinkcollFileStems) {
        message(sprintf("Running plink on %s...", bedFile))
		localParam = paramNameThenValue
		localParam = c("--bfile", bedFile, "--out", bedFile, localParam)
        ret = system2("plink", localParam, wait=wait, stdout=NULL, stderr=NULL)
#         if(dbgtrigger()) cat("Return value from PLINK: ", ret, "\n")
        if(ret != 0) stop("PLINK failed.")
        nscanned = nscanned + 1
        message(sprintf("Removing %s...", bedFile))
        file.remove(sprintf("%s.bed", bedFile))
	}
    cat("\n")
}
kindlychung/CollapsABEL documentation built on May 20, 2019, 9:57 a.m.