fastman <- function(m, chr = "CHR", bp = "BP", p = "P", snp, chrlabs, speedup=TRUE, logp = TRUE, col="matlab", maxP=14, sortchr=TRUE, bybp=FALSE, chrsubset, bprange,
highlight, annotateHighlight=FALSE, annotatePval, colAbovePval=FALSE, col2="greys", annotateTop=TRUE, annotationWinMb, annotateN, annotationCol, annotationAngle=45,
baseline=NULL, suggestiveline, genomewideline, cex=0.4, cex.text=0.4, cex.axis=0.6, xlab, ylab, xlim, ylim, ...) {
# use: source("fastman.R");
# example: tic(); fastman(m); toc();
# on a typical imputed assoc file, 10 million snps reduced to 170K, plotting time reduced to 737s in qqman to 60s.
# part 0: initialize parameters --------------------------------------------------------------------------------------------------------------------------------------------
if (missing(suggestiveline)) { suggestiveline=NULL; if (logp) { suggestiveline=-log10(1e-05); } } # if suggestiveline is not provided by user then 5 is set as default value
if (missing(genomewideline)) { genomewideline=NULL; if (logp) { genomewideline=-log10(5e-08); } } # if genomewideline is not provided by user then 8 is set as default value
if (missing(annotationCol)) { annotationCol="gray50"; if (col=="greys") { annotationCol="green4"; } } # if annotationCol is not provided by user then grey is set as default
if (!(annotationCol %in% colnames(m))) { m$annotationCol <- annotationCol; annotationCol <- "annotationCol"; } # if annotationCol is not a column in the data set then create a column with all entries taking the value of annotationCol provided # memory 9.7
if (annotationAngle<0|annotationAngle>90) { warning("Text orientation angle should be between 0 & 90 degrees. reverting to default."); annotationAngle=45; } # if annotationAngle is not within 0 and 90 then it is set to 45
if (!missing(annotatePval)) { if (!is.numeric(annotatePval)) { stop("Non-numeric value supplied to annotatePval."); } } # if non-numeric annotatePval is provided by user then the code gives error message
if (!missing(annotatePval)) { if (length(annotatePval)>1) { warning("Vector value supplied to annotatePval. Only first element has been considered."); annotatePval=annotatePval[1] } } # if vector annotatePval is provided by user then the code gives warning message and picks only first element
if (!missing(annotationWinMb)) {
if (!is.numeric(annotationWinMb)) { stop("Non-numeric value supplied to annotationWinMb."); } # if non-numeric annotationWinMb is provided by user then the code gives error message
if (annotationWinMb<0) { stop("Negative value supplied to annotationWinMb."); } # if negative annotationWinMb is provided by user then the code gives error message
}
if (!missing(annotateN)) {
if(!is.numeric(annotateN)) { stop("Non-positive value supplied to annotateN."); } # if non-numeric annotationN is provided by user then the code gives error message
if (annotateN<1) { stop("Too small value supplied to annotateN."); } # if less than 1 input for annotationN is provided by user then the code gives error message
}
if (!missing(annotatePval)&logp) { if (annotatePval<=0) {stop("Non-positive value supplied to annotatePval with log transformation selected."); }; if (annotatePval<1) {annotatePval=-log10(annotatePval); }; } # if non-positive annotatePval is provided by user then the code gives error message
if (!missing(highlight)) { highlight=highlight[!is.na(highlight)]; }
if (nrow(m)==0) { stop("Input data has 0 rows, cannot be plotted!"); } # check if data has actual rows
if (!(chr %in% colnames(m))) { stop(paste("Column", chr, "not found!")); } # check if chr column is provided
if (!(bp %in% colnames(m))) { stop(paste("Column", bp, "not found!")); } # check if bp column is provided
if (!(p %in% colnames(m))) { stop(paste("Column", p, "not found!")); } # check if p column is provided
if (!missing(chrsubset)) { if (!is.numeric(chrsubset)) { stop("Non-numeric value supplied to chrsubset."); } } # if non-numeric chrsubset is provided by user then code gives error message
if (!missing(bprange)) { # range of BP provided, to subset the data
if (!is.numeric(bprange)) { stop("Supplied BP range contains non-numeric values."); } # if non-numeric bprange is provided by user then code gives error message
if (length(bprange)!=2) { stop("Supplied BP range must contain 2 values, start and end of range."); } # if both start and end of bprange are not provided by user then code gives error message
if (bprange[2]<bprange[1]) { stop("Supplied BP range end point cannot be less than the start point."); } # if end point is less than the start point then code gives error message
bybp=TRUE; # if BP range supplied, then plotting must be by position
}
# check if snp column is necessary
if (!missing(highlight) | !missing(annotatePval) | !missing(annotateN)) { showsnp=TRUE; } else { showsnp=FALSE; } # create an indicator showsnp to check whether snp column is needed
if (showsnp) { # snp column needed
if (missing(snp)) { snp="SNP"; } # if snp column is not specified by user then the column named SNP in data is selected
if (!(snp %in% colnames(m))) { stop(paste("Column", snp, "not found!")); } # if user doesn't specify snp column and there is no column called SNP in data then code gives error message
}
zerocount <- 5
# set color palettes; no gray in ggplot palettes
if (length(col)==1) {
if (col=="matlab") { col=c("#D95319","#E4A100","#7E2F8E","#5EA500","#0095D4","#A2142F","#0C53AA"); }
else if (col=="matlab2") { col=c("#A2142F","#0095D4","#7E2F8E","#5EA500","#D95319","#0C53AA","#E4A100"); }
else if (col=="Set1") { col=c("#e41a1c","#377eb8","#f0c001","#4daf4a","#984ea3","#ff7f00","#a65628","#f781bf"); }
else if (col=="Dark2") { col=c("#1b9e77","#d95f02","#7570b3","#e7298a","#66a61e","#e6ab02","#a6761d"); }
else if (col=="Set2") { col=c("#66c2a5","#fc8d62","#8da0cb","#e78ac3","#a6d854","#ffd92f","#d1ad79"); }
else if (col=="DarkSet2") { col=c("#1b9e77","#ffd92f","#d95f02","#a6d854","#7570b3","#d1ad79","#e7298a","#66c2a5","#e6ab02","#8da0cb","#a6761d","#fc8d62","#66a61e","#e78ac3"); }
else if (col=="reds") { col=c("#e31a1c","#fd8f71"); }
else if (col=="greens") { col=c("#33a02c","#97d52b"); }
else if (col=="blues") { col=c("#1f78b4","#40a5eb"); }
else if (col=="purples") { col=c("#984ea3","#f781dd"); }
else if (col=="greys") { col=c("gray35","gray60"); }
else if (col=="rgbs") { col=c("#e31a1c","#fd8f71","#33a02c","#97d52b","#1f78b4","#40a5eb"); }
else if (col=="all") { col=c("#D95319","#E4A100","#7E2F8E","#5EA500","#0095D4","#A2142F","#97d52b","#0C53AA","#e6ab02","#40a5eb","#d95f02","#7570b3","#e7298a","#66a61e","#fd8f71","#a6761d","#1b9e77"); }
else if (col=="rainbow1") { col=c("#9b001e","#e31a1c","#ff6726","#ff9249","#c58b00","#f0c001","#97d52b","#099100","#055600","#009476","#00abea","#000cff","#0c53aa","#7570b3","#7e2f8e","#de00ff","#f781bf","#e7298a"); }
else if (col=="rainbow2") { col=c("#e31a1c","#ff7f00","#f0c001","#5EA500","#40a5eb","#0C53AA","#984ea3"); }
else if (col=="rainbow3") { col=c("#e31a1c","#40a5eb","#7E2F8E","#ff7f00","#5EA500","#f0c001","#0C53AA"); }
else { warning("inappropriate col color vector/tag provided. resorting to default"); col=c("#D95319","#E4A100","#7E2F8E","#5EA500","#0095D4","#A2142F","#0C53AA"); } # use matlab as default
}
if (length(col2)==1) {
if (col2=="greys") { col2=c("gray35","gray60"); }
else if (col2=="matlab2") { col2=c("#A2142F","#0095D4","#7E2F8E","#5EA500","#D95319","#0C53AA","#E4A100"); }
else if (col2=="Set1") { col2=c("#e41a1c","#377eb8","#f0c001","#4daf4a","#984ea3","#ff7f00","#a65628","#f781bf"); }
else if (col2=="Dark2") { col2=c("#1b9e77","#d95f02","#7570b3","#e7298a","#66a61e","#e6ab02","#a6761d"); }
else if (col2=="Set2") { col2=c("#66c2a5","#fc8d62","#8da0cb","#e78ac3","#a6d854","#ffd92f","#d1ad79"); }
else if (col2=="DarkSet2") { col2=c("#1b9e77","#ffd92f","#d95f02","#a6d854","#7570b3","#d1ad79","#e7298a","#66c2a5","#e6ab02","#8da0cb","#a6761d","#fc8d62","#66a61e","#e78ac3"); }
else if (col2=="reds") { col2=c("#e31a1c","#fd8f71"); }
else if (col2=="greens") { col2=c("#33a02c","#97d52b"); }
else if (col2=="blues") { col2=c("#1f78b4","#40a5eb"); }
else if (col2=="purples") { col2=c("#984ea3","#f781dd"); }
else if (col2=="matlab") { col2=c("#D95319","#E4A100","#7E2F8E","#5EA500","#0095D4","#A2142F","#0C53AA"); }
else if (col2=="rgbs") { col2=c("#e31a1c","#fd8f71","#33a02c","#97d52b","#1f78b4","#40a5eb"); }
else if (col2=="all") { col2=c("#D95319","#E4A100","#7E2F8E","#5EA500","#0095D4","#A2142F","#97d52b","#0C53AA","#e6ab02","#40a5eb","#d95f02","#7570b3","#e7298a","#66a61e","#fd8f71","#a6761d","#1b9e77"); }
else if (col2=="rainbow1") { col2=c("#9b001e","#e31a1c","#ff6726","#ff9249","#c58b00","#f0c001","#97d52b","#099100","#055600","#009476","#00abea","#000cff","#0c53aa","#7570b3","#7e2f8e","#de00ff","#f781bf","#e7298a"); }
else if (col2=="rainbow2") { col2=c("#e31a1c","#ff7f00","#f0c001","#5EA500","#40a5eb","#0C53AA","#984ea3"); }
else if (col2=="rainbow3") { col2=c("#e31a1c","#40a5eb","#7E2F8E","#ff7f00","#5EA500","#f0c001","#0C53AA"); }
else { warning("inappropriate col2 color vector/tag provided. resorting to default"); col2=c("gray35","gray60"); } # use greys as default
}
# part 1: calculate logP, etc. --------------------------------------------------------------------------------------------------------------------------------------------
if (showsnp) { m=m[,c(chr,bp,p,snp,annotationCol)]; colnames(m)=c("CHR","BP","P","SNP","annotationCol"); } # snp column needed
else { m=m[,c(chr,bp,p)]; colnames(m)=c("CHR","BP","P"); } # only chr, bp and p columns are kept, rest are dropped, snp column is kept only if needed
f=complete.cases(m[,1:3]); m=m[f,]; # memory 81.3
# check if BP and P columns are numeric
if (!is.numeric(m$BP)) { stop("BP column has non-numeric values"); } # if BP column has non-numeric values then code gives error message
if (!is.numeric(m$P)) { stop("P column has non-numeric values"); } # if P column has non-numeric values then code gives error message
if (!missing(chrsubset)) { f=(m$CHR %in% chrsubset); m=m[f,,drop=F]; } # subset by given chrsubset value(s)
if (nrow(m)==0) { stop("Input data has no usable rows, cannot be plotted!"); } # check if data has actual usable rows
unc=unique(m$CHR); # list of unique chromosome numbers
if (sortchr) { unc=sort(unc); } # chromosome numbers might not be ordered in the file, so sort by chr first
numc=length(unc); # number of unique chromosomes
if (!missing(bprange)&numc>1) { stop("Only one chromosome can be used with BP range, otherwise it is ambiguous. Pass a single chromosome ID via chrsubset"); }
if (!missing(bprange)) { f=(m$BP>=bprange[1])&(m$BP<=bprange[2]); m=m[f,,drop=F]; } # subset by given BP range
if (nrow(m)==0) { stop("Input data has no usable rows, cannot be plotted!"); } # check if data has actual usable rows
# if single chromosome, show mb position instead of scaled position
if (numc==1) { bybp=TRUE; }
# if to be sorted by position, no need to sort by chr name/number
if (bybp) { sortchr=FALSE; }
if (sortchr) { f=order(m$CHR); m=m[f,]; } # chromosome numbers might not be ordered in the file, so sort by chr first # memory 44.0
# if (bybp) { f=order(m$BP); m=m[f,]; } # sort by bp. is not necessary for plotting, but for nice colors
m$BP=as.double(m$BP)/1E6; # convert to MB position
if (logp) { m$logP=-log10(m$P); } else { m$logP=m$P; } # if log transformation is required then log base 10 of p-values is calculated, else the given column is passed as logP
# if chrlabs provided, check length
if (numc>1&!missing(chrlabs)) { if (length(chrlabs)!=numc) { warning("number of chromosome labels provided do not match number of unique chromosomes. reverting to values in CHR column."); chrlabs=unc; } }
else { chrlabs=unc; }
# part 2: prepare plotting positions etc. --------------------------------------------------------------------------------------------------------------------------------------------
if (bybp) { # show mb position instead of scaled position. scale positions to make suitable for rounding. applies if single chromosome
fac1c=23/(max(m$BP)-min(m$BP)); m$BPn=m$BP*fac1c; # calculating the scaling factor
if (!missing(annotationWinMb)) { annotationWinMb=annotationWinMb*fac1c; } # scaling the mb window
}
else { # typical plot, combine positions across chromosomes
cmat=matrix(NA,nrow=numc,ncol=4); # a matrix cmat is created with number of rows as number of unique chr and 3 columns
for (i in 1:numc) {
ch=unc[i];
f=m$CHR==ch; # memory 126.9
w=m$BP[f]; # memory 50.6
cmat[i,1]=min(w);
cmat[i,2]=max(w);
cmat[i,3]=cmat[i,2]-cmat[i,1];
m$BP[f]=w-cmat[i,1]; # memory 233.3
rm(f);
cmat[i,4]=median(diff(sort(w))); }
cmat=as.data.frame(cmat);
colnames(cmat)=c("min","max","width","medgap"); # each row of cmat corresponds to each chr, 4 columns of cmat are calculated as min BP, max BP, width, and median gap
maxgap=max(cmat$medgap,na.rm=TRUE);
cmat$base=0*cmat$min; cmat$midp=0*cmat$min;
i=1; cmat$base[i]=0; cmat$midp[i]=cmat$width[i]/2; # base and midpoint are calculated for each chr with respect to width calculated previously
if (numc>1) {
for (i in 2:numc) {
cmat$base[i]=cmat$base[i-1]+cmat$width[i-1]+maxgap;
cmat$midp[i]=cmat$base[i]+(cmat$width[i]/2); } }
fac=numc/cmat$midp[numc]; # calculating the scaling factor
m$BP=fac*m$BP; cmat$basef=fac*cmat$base; cmat$midpf=fac*cmat$midp;
m$BPn=m$BP;
if (numc>1) {
for (i in 2:numc) {
ch=unc[i];
f=m$CHR==ch; # memory 93.4
w=m$BP[f]; # memory 146.4
m$BPn[f]=w+cmat$basef[i]; # memory 171.6
rm(f);} }
if (!missing(annotationWinMb)) { annotationWinMb=annotationWinMb*fac; } # scaling the mb window
}
m$C=m$CHR;
for (i in 1:numc) {
ch=unc[i];
f=m$CHR==ch; # memory 72.0
m$C[f]=i; # memory 113.8
rm(f); }
# part 2 B: annotations -----------------------------------
if (showsnp) { # pick selected snps, using either of 3 flags
m=m[,c("C","BPn","logP","SNP","annotationCol")];
if (!missing(highlight)) { # highlight listed snps
f=(m$SNP %in% highlight); msnp=m[f,,drop=F]; # msnp will contain the SNP information for annotation, in this case msnp contains information of only SNPs to be highlighted
if (annotateHighlight&annotateTop) { # annotate the top highlighted SNP per chromosome
m1=NULL; sunc=unique(msnp$C);
for (i in sunc) { m2=msnp[msnp$C==i,]; f=m2$logP==max(m2$logP); if (sum(m2$logP<0)>0) {f=m2$logP==max(m2$logP)|m2$logP==min(m2$logP)}; m1=rbind(m1,m2[f,]); } # loop over chr and identify the max logP value for every chr
msnp=m1; # SNP information of only max logP value per chr is stored in msnp
rm(m1,m2);
}
}
else if (!missing(annotatePval)) { # highlight snps with pvalue beyond a threshold; change based on logp=TRUE or not
f=(abs(m$logP)>=annotatePval); zerocount=sum(f); msnp=m[f,,drop=F]; # msnp will contain the SNP information for annotation, in this case msnp contains information of SNPs with p-value beyond threshold
}
else if (!missing(annotateN)) { # highlight top N snps
if (!missing(annotationWinMb)) { # within each chr, only one snp within annotationWinMb window. annotationWinMb value adjusted by factor above
m0=NULL; sunc=unique(msnp$C);
for (i in sunc) { # loop over chr
m2=msnp[msnp$C==i,]; f=order(m2$logP,decreasing=TRUE); m2=m2[f,]; m1=m2[1,,drop=F];
if (nrow(m2)>1) { for (j in 2:nrow(m2)) { f=abs(m1$BPn-m2$BPn[j]); if (min(f)>annotationWinMb) { m1=rbind(m1,m2[j,,drop=F]); } } }
m0=rbind(m0,m1); # identify top SNP within annotationWinMb window for each chr
}
msnp=m0; # store information of only the top SNP within annotationWinMb window for each chr
rm(m0,m1,m2);
k=nrow(msnp)-annotateN+1; k=sort(msnp$logP,partial=k)[k]; # sorting the p-value column to identify the k-th highest value
f=(msnp$logP>=k); msnp=msnp[f,,drop=F]; # msnp will contain the SNP information for annotation, in this case msnp contains information of top N SNPs
}
else { # only highlight top N snps
k=nrow(m)-annotateN+1; k=sort(m$logP,partial=k)[k]; # sorting the p-value column to identify the k-th highest value
f=(m$logP>=k); msnp=m[f,,drop=F]; # msnp will contain the SNP information for annotation, in this case msnp contains information of top N SNPs
}
}
}
if (!missing(annotationWinMb)) { annotateTop=FALSE; } # if annotating by window, then top snp flag not applicable
if (missing(highlight)&!missing(annotatePval)&zerocount>0) { # not using highlight list, but just annotatePval instead. check annotateTop and annotationWinMb options
if (annotateTop) { # if annotateTop = TRUE (default), only top snps in each chr is shown. else all top snps are shown
m1=NULL; sunc=unique(msnp$C);
for (i in sunc) { m2=msnp[msnp$C==i,]; f=m2$logP==max(m2$logP); if (sum(m2$logP<0)>0) {f=m2$logP==max(m2$logP)|m2$logP==min(m2$logP)}; m1=rbind(m1,m2[f,]); } # loop over chr and identify the max logP value for every chr
msnp=m1; # SNP information of only max logP value per chr is stored in msnp
rm(m1,m2);
}
else if (!missing(annotationWinMb)) { # within each chr, only one snp within annotationWinMb window. annotationWinMb value adjusted by factor above
m0=NULL; sunc=unique(msnp$C);
for (i in sunc) { # loop over chr
m2=msnp[msnp$C==i,]; f=order(m2$logP,decreasing=TRUE); m2=m2[f,]; m1=m2[1,,drop=F];
if (nrow(m2)>1) { for (j in 2:nrow(m2)) { f=abs(m1$BPn-m2$BPn[j]); if (min(f)>annotationWinMb) { m1=rbind(m1,m2[j,,drop=F]); } } }
m0=rbind(m0,m1); # identify top SNP within annotationWinMb window for each chr
}
msnp=m0; # store information of only the top SNP within annotationWinMb window for each chr
rm(m0,m1,m2);
}
}
if (!is.null(maxP)) { # if user has not specified that there should not be any maxP truncation
if (showsnp) { # if annotation is required
f=msnp$logP>=maxP; msnp$logP[f]=maxP; # logP values above maxP are truncated to maxP in msnp
f=msnp$logP<=-maxP; msnp$logP[f]=-maxP; # logP values below - maxP are truncated to -maxP in msnp
}
f=m$logP<=-maxP; m$logP[f]=-maxP; # logP values below -maxP are truncated to -maxP in m # memory 23.6
f=m$logP>=maxP; m$logP[f]=maxP; # logP values above maxP are truncated to maxP in m
}
else { # if user has specified that there should not be any maxP truncation
if (showsnp) { # if annotation is required
f=msnp$logP==Inf; msnp$logP[f]=sort(msnp$logP)[length(msnp$logP)-1]; # Inf values are truncated to highest finite value
f=msnp$logP==-Inf; msnp$logP[f]=sort(msnp$logP)[2]; # -Inf values are truncated to lowest finite value
}
f=m$logP==Inf; m$logP[f]=sort(m$logP)[length(m$logP)-1]; # Inf values are truncated to highest finite value
f=m$logP==-Inf; m$logP[f]=sort(m$logP)[2]; # -Inf values are truncated to lowest finite value
}
if (showsnp) { # if annotation is required
msnp$SNP[is.na(msnp$annotationCol)] <- NA; # if user has specified NA as colour of a SNP then the annotation text is marked as NA
}
# part 2 C: finally keep 3 columns only, as msnp with snp names is separate now -----------------------------------
m=m[,c("C","BPn","logP")];
# temporarily scale y axis to make more suitable for plotting
facy=10/(max(m$logP)-min(m$logP)); m$logP=m$logP*facy;
# part 3: reduce size for fast plotting --------------------------------------------------------------------------------------------------------------------------------------------
if (speedup) { # fast method: below 0.2% and above 99.8% round to 3 digits, rest round to 2 digits
quants=c(0,0.002,0.5,0.998,1); quants=quantile(m$logP,quants); # minimum, 0.2th percentile, median, 99.8th percentile and maximum are being calculated
right=(quants[5] - quants[4])/(quants[4] - quants[3]); # measure of significance of right tail
left=(quants[1] - quants[2])/(quants[2] - quants[3]); # measure of significance of left tail
if (nrow(m)<1E5) { #if there are lest than 100k rows then full data is rounded to 3 digits
digs=3; ms=m; ms$logP=round(ms$logP,digits=digs); ms$BPn=round(ms$BPn,digits=digs); f=duplicated(ms); ms=ms[!f,];
}
else { # round lower and upper parts separately
if (right>0.1) { # significant right tail
if (left>0.1) { # significant left tail
outer=m$logP>quants[4]|m$logP<quants[2]; inner=!outer;
digs=2;
ms=m[inner,];
ms$logP=round(ms$logP,digits=digs);
ms$BPn=round(ms$BPn,digits=digs);
f=duplicated(ms); ms=ms[!f,]; # inner part is rounded to 2 digits
digs=3;
m1=m[outer,];
m1$logP=round(m1$logP,digits=digs);
m1$BPn=round(m1$BPn,digits=digs);
f=duplicated(m1);
m1=m1[!f,];
ms=rbind(ms,m1); # outer part is rounded to 3 digits
}
else { # insignificant left tail
outer=m$logP>quants[4]; inner=!outer;
digs=2;
ms=m[inner,]; # memory 67.2
ms$logP=round(ms$logP,digits=digs);
ms$BPn=round(ms$BPn,digits=digs); # memory 9.3
f=duplicated(ms);
ms=ms[!f,]; # inner part is rounded to 2 digits # memory 24.4
digs=3;
m1=m[outer,]; # memory 23.8
m1$logP=round(m1$logP,digits=digs);
m1$BPn=round(m1$BPn,digits=digs);
f=duplicated(m1); m1=m1[!f,];
ms=rbind(ms,m1); # outer part is rounded to 3 digits # memory 19.2
}
}
else { # insignificant right tail
if (left>0.1) { # significant left tail
outer=m$logP<quants[2];
inner=!outer;
digs=2;
ms=m[inner,];
ms$logP=round(ms$logP,digits=digs);
ms$BPn=round(ms$BPn,digits=digs);
f=duplicated(ms);
ms=ms[!f,]; # inner part is rounded to 2 digits
digs=3;
m1=m[outer,];
m1$logP=round(m1$logP,digits=digs);
m1$BPn=round(m1$BPn,digits=digs);
f=duplicated(m1);
m1=m1[!f,];
ms=rbind(ms,m1); # outer part is rounded to 3 digits
}
else { # insignificant left tail
digs=3;
ms=m;
ms$logP=round(ms$logP,digits=digs);
ms$BPn=round(ms$BPn,digits=digs);
f=duplicated(ms); ms=ms[!f,]; # as there is no significant tail full data is rounded to 3 digits
}
}
rm(m1);
}
}
else # full data without any reduction
{ ms=m; }
# print(paste(nrow(m),"snps reduced to",nrow(ms)));
# remove variables to reduce memory hog before plotting
rm(m);
# if showing mb position instead of scaled position, rescale positions after rounding (e.g. if single chromosome)
if (bybp) {
ms$BPn=ms$BPn/fac1c;
if (showsnp) { msnp$BPn=msnp$BPn/fac1c; }
}
ms$logP=ms$logP/facy;
# part 4: plot --------------------------------------------------------------------------------------------------------------------------------------------
palette(col); # setting the colour palette
ybnd=c(floor(min(c(min(ms$logP),0))),ceiling(max(ms$logP))); # setting the y axis boundaries
# set x axis boundaries, depending on whether single chromosome or not
fac=0.015*(max(ms$BPn)-min(ms$BPn));
if (numc==1) { xbnd=c(min(ms$BPn)-fac,max(ms$BPn)+fac); } else { xbnd=c(-fac,max(ms$BPn)+fac); }
if (showsnp&zerocount>0) { # if annotation is required
parmai=par("mai"); # measuring the margin size in inches
plotx=par("pin")[1]; ploty=par("pin")[2]; # storing the plot width in inches within variable plotx and plot height in inches within variable ploty
xwidth=xbnd[2]-xbnd[1]; ywidth=ybnd[2]-ybnd[1]; # calculating the inherent width and height of the plot
m2=msnp;
strgap=strheight("A",units="inches",cex=cex.text);
m2$strwidth=strwidth(m2$SNP,units="inches",cex=cex.text)+strheight(m2$SNP,units="inches",cex=cex.text); # measuring the dimensions of the SNP texts in inches
m2$width=m2$strwidth*cospi(annotationAngle/180); # measuring the x component of the SNP text in inches
m2$height=m2$strwidth*sinpi(annotationAngle/180); # measuring the y component of the SNP text in inches
m2$xmax=m2$BPn+fac/7+m2$width*xwidth/plotx; # measuring the maximum width required for SNP texts in plot X units
m2$ymax=m2$logP+fac/7+m2$height*ywidth/ploty; # measuring the maximum height required for SNP text in plot Y units
extrax=max( (max(m2$xmax)-xbnd[2])*plotx/xwidth - 0.8*parmai[4] , 0); # measuring the extra width required from margin in inches
extray=max( (max(m2$ymax)-ybnd[2])*ploty/ywidth - 0.8*parmai[3] , 0); # measuring the extra height required from margin in inches
parmai[3]=parmai[3]+extray; parmai[4]=parmai[4]+extrax; # adding the extra width and height
par(mai=parmai); # setting the final margin
}
if (!missing(xlim)) { xbnd=xlim; }
if (!missing(ylim)) { ybnd=ylim; }
xlbl="Chromosome";
if (numc==1) { xlbl=paste(xlbl,unc[1],"(Mb)",sep=" "); } # if single chromosome, add chromosome number to X label
if (bybp&numc>1) { xlbl="Position (Mb)"; }
if (logp) { ylbl=expression(-log[10](italic(p))); } else { ylbl=p; } # adapt Y label to whether log transformed or not
if (!missing(xlab)) { xlbl=xlab; }
if (!missing(ylab)) { ylbl=ylab; }
par(xpd = NA)
if (colAbovePval) { # Colour all hits above p-value threshold, while points below are col2
if (bybp) { # show mb position instead of scaled position
palette(col2); # setting colour palette to col2 for hits below threshold
plot(ms$BPn,ms$logP,pch=20,col=ms$C,cex=cex,cex.axis=cex.axis,las=1,xaxt="n",bty="n",xaxs="i",yaxs="i",xlim=xbnd,ylim=ybnd,xlab=xlbl,ylab=ylbl,...);
axis(side=1,pos=0,las=1,cex.axis=cex.axis,col=NA,col.ticks="black");
f=abs(ms$logP)>=annotatePval; ms=ms[f,]; # selecting hits above threshold
palette(col); # setting colour palette to user defined / default for hits above threshold
points(ms$BPn,ms$logP,col=ms$C,pch=20,cex=cex); # plotting hits above threshold
}
else { # typical plot
palette(col2) # setting colour palette to col2 for hits below threshold
plot(ms$BPn,ms$logP,pch=20,col=ms$C,cex=cex,cex.axis=cex.axis,las=1,xaxt="n",bty="n",xaxs="i",yaxs="i",xlim=xbnd,ylim=ybnd,xlab=xlbl,ylab=ylbl,...);
axis(side=1,at=cmat$midpf,labels=chrlabs,pos=0,las=1,cex.axis=cex.axis,col=NA,col.ticks="black");
f=abs(ms$logP)>=annotatePval; ms=ms[f,]; # selecting hits above threshold
palette(col); # setting colour palette to user defined / default for hits above threshold
points(ms$BPn,ms$logP,col=ms$C,pch=20,cex=cex); # plotting hits above threshold
}
}
else { # one colour scheme for the entire plot
if (bybp) { # show mb position instead of scaled position
plot(ms$BPn,ms$logP,pch=20,col=ms$C,cex=cex,cex.axis=cex.axis,las=1,xaxt="n",bty="n",xaxs="i",yaxs="i",xlim=xbnd,ylim=ybnd,xlab=xlbl,ylab=ylbl,...);
axis(side=1,pos=0,las=1,cex.axis=cex.axis,col=NA,col.ticks="black");
}
else { # typical plot
plot(ms$BPn,ms$logP,pch=20,col=ms$C,cex=cex,cex.axis=cex.axis,las=1,xaxt="n",bty="n",xaxs="i",yaxs="i",xlim=xbnd,ylim=ybnd,xlab=xlbl,ylab=ylbl,...);
axis(side=1,at=cmat$midpf,labels=chrlabs,pos=0,las=1,cex.axis=cex.axis,col=NA,col.ticks="black");
}
}
if (is.numeric(baseline)) { abline(h=baseline,col="black", xpd=FALSE); } # plotting baseline
if (is.numeric(suggestiveline)) { abline(h=suggestiveline,col="blue", xpd=FALSE); if (length(suggestiveline)==1&sum(ms$logP<0)>0) { abline(h=-suggestiveline,col="blue", xpd=FALSE) }; } # plotting suggestiveline
if (is.numeric(genomewideline)) { abline(h=genomewideline,col="red", xpd=FALSE); if (length(genomewideline)==1&sum(ms$logP<0)>0) { abline(h=-genomewideline,col="red", xpd=FALSE) }; } # plotting genomewideline
# part 5: plot highlights / annotations if necessary --------------------------------------------------------------------------------------------------------------------------------------------
adj=c(0,0);
if (annotationAngle==0) { adj=c(0,0.5); }
if (!missing(highlight)) { # show highlighted snps
points(msnp$BPn,msnp$logP,col=msnp$annotationCol,pch=20,cex=cex); # plotting highlighted points
if (annotateHighlight) { # annotate some/all of listed snps
if (!missing(annotatePval)) { f=msnp$logP>=annotatePval; if (zerocount>0) { m2=msnp[f,]; text(x=m2$BPn+fac/7,y=m2$logP+fac/7,labels=m2$SNP,adj=adj,srt=annotationAngle,col=m2$annotationCol,cex=cex.text,...); }; f=msnp$logP<=-annotatePval; if (zerocount>0) { m2=msnp[f,]; text(x=m2$BPn+fac/7,y=m2$logP+fac/7,labels=m2$SNP,adj=adj,srt=-annotationAngle,col=m2$annotationCol,cex=cex.text,...); } } # only snps above threshold
else { m2=msnp; text(x=m2$BPn+fac/7,y=m2$logP+fac/7,labels=m2$SNP,adj=adj,srt=annotationAngle,col=m2$annotationCol,cex=cex.text,...); } # all snps
}
}
if (!missing(annotateN)) { # not using highlight list, but annotatePval or annotateN instead
m2=msnp; text(x=m2$BPn+fac/7,y=m2$logP+fac/7,labels=m2$SNP,adj=adj,srt=annotationAngle,col=m2$annotationCol,cex=cex.text,...); # annotating the selected snps
}
if (!missing(annotatePval)&(zerocount>0)) { # not using highlight list, but annotatePval or annotateN instead
f=msnp$logP>0; m2=msnp[f,]; text(x=m2$BPn+fac/7,y=m2$logP+fac/7,labels=m2$SNP,adj=adj,srt=annotationAngle,col=m2$annotationCol,cex=cex.text,...); # annotating the selected snps on the positive side
if (sum(msnp$logP<0)>0) { f=msnp$logP<0; m2=msnp[f,]; text(x=m2$BPn+fac/7,y=m2$logP+fac/7,labels=m2$SNP,adj=adj,srt=-annotationAngle,col=m2$annotationCol,cex=cex.text,...); } # annotating the selected snps on the negative side
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.