gwasWeighted: Weighted Bonferroni Correction in GWASs

#
#	genomicRotation.R
#Mon Dec 17 15:45:47 CET 2018

#
#	<p> genomic rotation
#

# df: data.frame containing row-wise genomic information
# N: the shift number
genomicRotationIdcs = function(df, Nshift) {
	if (Nshift > nrow(df)) {
		#warning('Shift too large');
		Nshift = Nshift %% nrow(df);
	}
	#	print(Nshift);
	indeces = c(Seq(Nshift+1, nrow(df)), Seq(1, Nshift));
	return(indeces);
}
# columns: which columns to rotate
genomicRotation = function(df, Nshift, columns = 'P') {
	indeces = genomicRotationIdcs(df, Nshift);
	df[, columns] = df[indeces, columns, drop = F];
	df
}

# rotate by percentage
# perc: fraction of total SNP number by which to shift P-values (columns argument), rotation is performed
#	per chromosome
# orderBy: columns by which to order data frames first, needed to assure alignment of later aggregation
# columns: which columns to rotate
genomicRotationPerc = function(d, perc, columns = 'P', by = 'chr', orderBy = c('chr', 'pos')) {
	dO = d[order.df(d, orderBy), , drop = F];
	Nshift = round(perc * nrow(d));
	# rotation per chromosome
	idcsRot = by(dO, dO[[by]], function(d)genomicRotationIdcs(d, Nshift));
	# compute permutation
	Nrow = sapply(idcsRot, length);
	Ioff = pop(c(0, cumsum(Nrow)));
	idcsL = lapply(seq_along(Ioff), function(i)idcsRot[[i]] + Ioff[i]);
	idcs = unlist(lapply(seq_along(Ioff), function(i)idcsRot[[i]] + Ioff[i]));
	dO[, columns] = dO[idcs, columns, drop = F];
	return(dO);
}

# rotate several data.frames by percentage
genomicRotationsPerc = function(dfs, perc, columns = 'P', by = 'chr', orderBy = c('chr', 'pos')) {
	return(lapply(dfs, genomicRotationPerc, perc = perc, columns = columns, by = by, orderBy = orderBy));
}

# rotate several data.frames by different percentages, number of data.frames and percentages has to be the same
genomicRotationsPercs = function(dfs, percs, columns = 'P', by = 'chr', orderBy = c('chr', 'pos')) {
	return(lapply(seq_along(percs), function(i) {
		genomicRotationPerc(dfs[[i]], percs[i], by = by, orderBy = orderBy)
	}));
}

#
#	<p> data simulation
#

# Nstudies: total number of studies
# rotation: percentages of SNP positions between which to rotate
simulateNull = function(d0, Nstudies = 3, rotation = c(.004, 04), by = 'chr', orderBy = c('chr', 'pos')) {
	ds = rep(list(d0), Nstudies);
	rot = runif(Nstudies - 1, rotation[1], rotation[2]);
	dfs = genomicRotationsPercs(ds, c(0, rot), 'P', by = by, orderBy = orderBy);
	return(dfs);
}

modifyRegion = function(d0, region, rotation = c(0, 5), attenuation = .99, noise = .01, Nregion = 5e4) {
	Is = with(region, which(d0$chr == chr & d0$pos >= pos - Nregion & d0$pos <= pos + Nregion));
	d0R = d0[Is, , drop = F];
	Nsnps = nrow(d0R);
	rotA = runif(1, rotation[[1]]/Nsnps, rotation[[2]]/Nsnps);
	d0Rot = genomicRotationPerc(d0R, rotA, columns = 'P');
	Tmod = qchisq(d0Rot$P, 1, lower.tail = F) * attenuation * rlnorm(Nsnps, 0, noise);
	d0Rot$P = pchisq(Tmod, 1, lower.tail = F);
	return(list(Is = Is, data = d0Rot));
}

# Simulation idea: identify top-hits from target study, copy these signals to a number of discovery studies,
#	modify the signal by rotation, fixed attenuation, random pertubation
# rotationAlt: rotation of top-region after copying to discovery study, given in number of expected loci
# Ncopy: to how many discovery studies is a signal copied
# attenuation: fixed factor to attenuate signal from the target study on chisq scale
# noise: standard deviation of lognormal factor to scale signals before copying
simulateAltTopHits = function(d0, Nstudies = 3, rotation = c(.004, 04),
	Ptop = 2e-6, Nregion = 5e4,
	rotationAlt = c(1, 5), Ncopy = 1, attenuation = .95, noise = .05) {
	ds0 = simulateNull(d0, Nstudies, rotation);
	th = droplevels(snpsTopHitsFromTable(ds0[[1]], P ~ chr + pos, P = Ptop, Nregion = Nregion));
	for (i in Seq(1, nrow(th))) {
		for (j in 1:Ncopy) {
			d0Reg = modifyRegion(ds0[[1]], as.list(th[i, ]), rotationAlt, attenuation, noise, Nregion);
			ds0[[j + 1]][d0Reg$Is, ] = d0Reg$data;
		}
	}
	return(ds0);
}

#
#	<p> analysis
#

sumlog = function (p) {
	keep <- !is.na(p) & (p > 0) & (p <= 1);
	if (sum(keep) == 0) return(NA);
	if (sum(keep) == 1) return(list(p = p[keep], df = 2));
	lnp <- log(p[keep]);
	chisq <- (-2) * sum(lnp);
	df <- 2 * length(lnp);
	r = list(chisq = chisq, df = df, p = pchisq(chisq, df,  lower.tail = FALSE), validp = p[keep])
	return(r)
}

Sumlog = function(p)sumlog(p)$p

analyzeDataMeta = function(dfs, pars = list()) {
	Ps = do.call(cbind, list.kp(dfs, 'P'));
	Ina = which(apply(Ps, 1, function(p)any(is.na(p))));
	# <!> cave bias
	if (length(Ina) > 0) Ps = Ps[- Ina, , drop = F];
	Pmeta = apply(Ps, 1, Sumlog);
	r = list(Pmin = min(Pmeta, na.rm = T), TS = tailStrength(Pmeta), gini = Gini(Pmeta, na.rm = T));
	return(r);
}

analyzeDataWB = function(dfs, pars) {
	r = run_pipeline(dfs[[1]], dfs[-1], pars$power, pars$kernel, weightCombiner = 'min');
	return(r);
}

analyzeDataAll = function(dfs, pars) {
	r = list(analyzeDataMeta(dfs, pars), analyzeDataWB(dfs, pars));
	return(r);
}

#
#	<p> simulation runs
#

runSimulationSingle = function(d0, Nstudies = 3, rotation = c(.01, .05), ...) {
	ds = simulateNull(d0, Nstudies, rotation);
	r = analyzeDataAll(ds, ...);
	return(r);
}


runSimulationNull = function(d0, Nstudies = 3, Nrep = 10, rotation = c(.01, .05), ...) {
	r = lapply(Seq(1, Nrep), function(i) {
		runSimulationSingle(d0, Nstudies, rotation, ...)
	});
	return(r);
}

#Nstudies, rotation, rotationAlt, Ncopy, attenuation, noise
runSimulationAltSingle = function(d0, Nstudies = 3, rotation = c(.004, 04),
	Ptop = 2e-6, Nregion = 5e4,
	rotationAlt = c(1, 5), Ncopy = 1, attenuation = .95, noise = .05, ...) {

	ds = simulateAltTopHits(d0, Nstudies, rotation, Ptop, Nregion, rotationAlt, Ncopy, attenuation, noise);
	r = analyzeDataAll(ds, ...);
	return(r);
}

#
#	<p> data management
#

# in vector v, find index min j \in 1, ..., N so that v[1:j] contains at least U unique elements
uniqueIndex = function(v, U) {
	#Nu = sapply(seq_along(v), function(i)length(unique(data$chr[1:i])));
	# more efficient version
	u = c();
	for (i in seq_along(v)) {
		u = unique(c(u, v[i]));
		if (length(u) == U) return(i);
	}
	return(NA);
}

selectTopChrs = function(data, Nchrs = 3, fCols = P ~ chr + pos, P = 5e-4, Nregion = 5e4) {
	th = droplevels(snpsTopHitsFromTable(data, fCols, P = P, Nregion = Nregion));
	if (nrow(th) == 0) stop('no top hits identified');
	Ichr = uniqueIndex(th$chr, Nchrs);
	r = th[Seq(1, Ichr), , drop = F];
	return(r);
}

reduceDataSingle = function(d, Nchrs = 3, fCols = P ~ chr + pos, P = 5e-4, Nregion = 5e4, NselRange = NULL,
	chrs = NULL) {
	if (is.null(chrs)) chrs = selectTopChrs(d, Nchrs, fCols, P, Nregion);
	dSel = if (notE(NselRange)) {
		do.call(rbind, lapply(1:nrow(chrs), function(i) {
			subset(d, chr == chrs$chr[i] & pos >= chrs$pos[i] - NselRange & pos <= chrs$pos[i] + NselRange)
		}))
	} else merge(d, Df(chr = chrs$chr));
	Ina = which(is.na(dSel$P));
	if (length(Ina) > 0) dSel = dSel[-Ina, , drop = F];
	return(dSel);
}

# <!> names not homogenized
reduceData = function(ds, Nchrs = 3, fCols = P ~ chr + pos, P = 5e-4, Nregion = 5e4, NselRange = NULL) {
	chrs = selectTopChrs(ds[[1]], Nchrs, fCols, P, Nregion);
	dsSel = lapply(ds, function(d)reduceDataSingle(d, Nchrs, fCols, P, Nregion, NselRange, chrs));
	return(dsSel);
}

#
#	<p> parallel simulations null
#
dataColsDefault = c('chr', 'pos', 'marker', 'P');
simulateModelsWBNullScenarioSingle = function(Nstudies, pars, rotation, studyIndex, reduce) {
	LogS(2,
		'Nstudies: %{Nstudies}d, rotation: %{rot}s, studyIndex %{studyIndex}s',
		rot = join(rotation, ';')
	);
	path = parallelize_lookup(studyIndex);
	d = readTable(path, headerMap = list(posPhy = 'pos'))[dataColsDefault];
	d0 = do.call(reduceDataSingle, c(list(d), reduce));
	r = runSimulationSingle(d0, Nstudies, rotation, pars);
	return(r);
}
simulateModelsWBNullScenario = function(Nrep, Nstudies, pars, rotation, studyIndex, reduce) {
	r = Lapply(Seq(1, Nrep), function(i)
		simulateModelsWBNullScenarioSingle(Nstudies, pars, rotation, studyIndex, reduce)
	);
	return(r);
}


simulateModelsWBNull = function(modelList, parallel = F) {
	r = iterateModels(modelList, simulateModelsWBNullScenario, parallel = parallel);
	return(r);
}

#
#	<p> parallel simulations alt
#
simulateModelsWBAltScenarioSingle = function(Nstudies, pars, rotation, studyIndex, reduce,
	Ptop, Nregion, rotationAlt, Ncopy, attenuation, noise) {

	LogS(2,
		'Nstudies: %{Nstudies}d, rotation: %{rot}.2f, studyIndex %{studyIndex}d',
		rot = join('; ', rotation)
	);
	path = parallelize_lookup(studyIndex);
	d = readTable(path, headerMap = list(posPhy = 'pos'))[dataColsDefault];
	d0 = do.call(reduceDataSingle, c(list(d), reduce));
	r = runSimulationAltSingle(d0, Nstudies, rotation,
		Ptop, Nregion, rotationAlt, Ncopy, attenuation, noise, pars);
	return(r);
}
simulateModelsWBAltScenario = function(Nrep, Nstudies, pars, rotation, studyIndex, reduce,
	Ptop, Nregion, rotationAlt, Ncopy, attenuation, noise) {

	r = Lapply(Seq(1, Nrep), function(i)
		simulateModelsWBAltScenarioSingle(Nstudies, pars, rotation, studyIndex, reduce,
			Ptop, Nregion, rotationAlt, Ncopy, attenuation, noise)
	);
	return(r);
}


simulateModelsWBAlt = function(modelList, parallel = F) {
	r = iterateModels(modelList, simulateModelsWBAltScenario, parallel = parallel);
	return(r);
}

#
#	<p> summary
#
summaryMeta = function(r, PminData) {
	Pmin = list.kpu(r, '[[1]]$Pmin');
	Psupport = mean(Pmin < PminData, na.rm = T);
	return(Df(Analysis = 'meta', Psupport = Psupport));
}
summaryWB = function(r, PminData) {
	Pmin = filterList(list.kp(r, '[[2]]$[[1]]'), notE);
	PminA = aaply(laply(Pmin, as.matrix), c(2, 3), identity);
	Psupport = apply(PminA, 1:2, function(Pmin)mean(Pmin < PminData, na.rm = T));
	combs = merge.multi.list(as.list(dimnames(Psupport)));
	PsupportDf = apply(combs, 1, function(i)Psupport[i[1], i[2]]);
	combsChar = apply(combs, 1, function(i)Sprintf('E=%{E}d K=%{K}d', E = i[1], K = i[2]));
	return(Df(Analysis = combsChar, Psupport = PsupportDf));
}
summaryAll = function(r, PminData) {
	rbind(summaryMeta(r, PminData), summaryWB(r, PminData));
}
summaryPlot = function(sa, plotTitle = NULL) {
	pl = ggplot(data = sa, aes(x = Analysis, y = Psupport)) + geom_bar(stat="identity") +
		geom_hline(aes(yintercept = .5, color = 'red')) + xlab('Type of Analysis') +
		theme_bw() + theme(legend.position="none");
	if (notE(title)) pl = pl + ggtitle(plotTitle);
	return(pl);
}

studyData = function(modelList) {
	nlapply(modelList$studyIndex, function(n) {
		d = readTable(parallelize_lookup(n));
		return(d);
		#Pmin = min(d$P, na.rm = T);
		#return(list(P = Pmin));
	});
}

# ds = studyDescriptors(modelList)
summaryPlotsSave = function(r, ds,
	nameTemplate = 'results-sim/SimAlt-%{studyName}s-%{Ncopy}d-%{attenuation}d-%{noise}d.%{ext}s',
	plotType = 'pdf') {
	dTags = names(ds);
	plots = lapply(seq_along(r$results), function(i) {
		Istudy = r$models[i, 'studyIndex'];
		studyName = dTags[Istudy];
		summ = summaryAll(r$results[[i]], min(ds[[Istudy]]$P, na.rm = T));
		summL = list(summary = summ, minPdata = min(ds[[Istudy]]$P));
		pathSummary = with(as.list(r$models[i, ]), Sprintf(nameTemplate, ext = 'Rdata'));
		save(summL, file = pathSummary);
		summaryPlot(summ, plotTitle = Sprintf('Study %{studyName}s'))
	});
	ilapply(plots, function(pl, i) {
		studyName = dTags[r$models[i, 'studyIndex']];
		path = with(as.list(r$models[i, ]), Sprintf(nameTemplate, ext = plotType));
		plot_save(pl, plot_path = path)
	});
	return(plots);
}
sboehringer/gwasWeighted documentation built on Dec. 22, 2021, 10:19 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
sboehringer/gwasWeighted
Weighted Bonferroni Correction in GWASs

R/genomicRotation.R
In sboehringer/gwasWeighted: Weighted Bonferroni Correction in GWASs

R Package Documentation

Browse R Packages

We want your feedback!

sboehringer/gwasWeighted Weighted Bonferroni Correction in GWASs

R/genomicRotation.R In sboehringer/gwasWeighted: Weighted Bonferroni Correction in GWASs

R Package Documentation

Browse R Packages

We want your feedback!

sboehringer/gwasWeighted
Weighted Bonferroni Correction in GWASs

R/genomicRotation.R
In sboehringer/gwasWeighted: Weighted Bonferroni Correction in GWASs