#####################################################################
#
# mqmscan.R
#
# Copyright (c) 2009-2012, Danny Arends
#
# Modified by Pjotr Prins and Karl Broman
#
#
# first written Februari 2009
# last modified Aug 2012
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License,
# version 3, as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but without any warranty; without even the implied warranty of
# merchantability or fitness for a particular purpose. See the GNU
# General Public License, version 3, for more details.
#
# A copy of the GNU General Public License, version 3, is available
# at http://www.r-project.org/Licenses/GPL-3
#
# Part of the R/qtl package
# Contains: mqmscan
#
#
#####################################################################
######################################################################
#
# mqmscan: main scanning function to the mqmpackage
#
######################################################################
mqmscan <- function(cross,cofactors=NULL,pheno.col=1,model=c("additive","dominance"),forceML=FALSE,
cofactor.significance=0.02,em.iter=1000,window.size=25.0,step.size=5.0,logtransform = FALSE,
estimate.map = FALSE,plot=FALSE,verbose=FALSE, outputmarkers=TRUE, multicore=TRUE, batchsize=10, n.clusters=1,
test.normality=FALSE,off.end=0){
start <- proc.time()
model <- match.arg(model)
#Because iirc we cannot pass booleans from R to C
if(forceML){
forceML <- 1 #1 -> Maximum Likelyhood
}else{
forceML <- 0 #0 -> Restricted Maximum Likelyhood
}
dominance <- 0 #We code :0 -> Additive model (no_dominance)
if(model=="dominance"){
dominance <- 1 #and 1 -> Dominance model
}
if(estimate.map){
estimate.map <- 1
}else{
estimate.map <- 0
}
n.run <- 0
if(is.null(cross)){
stop("No cross file. Please supply a valid cross object.")
}
if(class(cross)[1] == "f2" || class(cross)[1] == "bc" || class(cross)[1] == "riself"){
if(class(cross)[1] == "f2"){
ctype = 1
}
if(class(cross)[1] == "bc" || class(cross)[1]=="dh"){
ctype = 2
}
if(class(cross)[1] == "riself") {
ctype = 3
# check genotypes
g <- as.numeric(pull.geno(cross))
g <- sort(unique(g[!is.na(g)]))
if(max(g)==2) { # convert genotypes from 1/2 to 1/3
for(i in seq(along=cross$geno))
cross$geno[[i]]$data[!is.na(cross$geno[[i]]$data) & cross$geno[[i]]$data==2] <- 3
}
}
n.ind <- nind(cross)
n.chr <- nchr(cross)
if(verbose) {
cat("INFO: Received a valid cross file type:",class(cross)[1],".\n")
cat("INFO: Number of individuals: ",n.ind,"\n")
cat("INFO: Number of chromosomes: ",n.chr,"\n")
}
savecross <- cross
geno <- NULL
chr <- NULL
dist <- NULL
newcmbase <- NULL
out.qtl <- NULL
for(i in 1:n.chr) {
#We always shift the marker positions to 0
geno <- cbind(geno,cross$geno[[i]]$data)
chr <- c(chr,rep(i,dim(cross$geno[[i]]$data)[2]))
newcmbase = c(newcmbase,min(cross$geno[[i]]$map))
cross$geno[[i]]$map <- cross$geno[[i]]$map-(min(cross$geno[[i]]$map))
dist <- c(dist,cross$geno[[i]]$map)
}
if(cofactor.significance <=0 || cofactor.significance >= 1){
stop("cofactor.significance must be between 0 and 1.\n")
}
if(any(is.na(geno))){
stop("Missing genotype information, please estimate unknown data, before running mqmscan.\n")
}
if(missing(cofactors)) cofactors <- rep(0,sum(nmar(cross)))
numcofold <- sum(cofactors)
cofactors <- checkdistances(cross,cofactors,1)
numcofnew <- sum(cofactors)
if(numcofold!=numcofnew){
cat("INFO: Removed ",numcofold-numcofnew," cofactors that were close to eachother\n")
}
#Convert phenotypes from character to numeric
pheno.col = stringPhenoToInt(cross,pheno.col)
if (length(pheno.col) > 1){
cross$pheno <- cross$pheno[,pheno.col] #Scale down the triats
result <- mqmscanall( cross,cofactors=cofactors,forceML=forceML,model=model,
cofactor.significance=cofactor.significance,step.size=step.size,window.size=window.size,
logtransform=logtransform, estimate.map = estimate.map,plot=plot, verbose=verbose,n.clusters=n.clusters,batchsize=batchsize)
return(result)
}
if(pheno.col != 1){
if(verbose) {
cat("INFO: Selected phenotype ",pheno.col,".\n")
cat("INFO: Number of phenotypes in object ",nphe(cross),".\n")
}
if(nphe(cross) < pheno.col || pheno.col < 1){
stop("No such phenotype in cross object.\n")
}
}
if(test.normality && !mqmtestnormal(cross, pheno.col, 0.01, FALSE)){
warning("Trait might not be normal (Shapiro normality test)\n")
}
pheno <- cross$pheno[[pheno.col]]
phenovar = var(pheno,na.rm = TRUE)
if(phenovar > 1000){
if(!logtransform){
if(verbose) cat("INFO: Before LOG transformation Mean:",mean(pheno,na.rm = TRUE),"variation:",var(pheno,na.rm = TRUE),".\n")
#warning(paste("WARNING: Set needs Log-transformation? (var=",phenovar,"), use mqmscan with logtransform=TRUE"))
}
}
if(logtransform){
#transform the cross file
cross <- transformPheno(cross,pheno.col,transf=log)
pheno <- cross$pheno[[pheno.col]]
}
n.mark <- ncol(geno)
if(verbose) cat("INFO: Number of markers:",n.mark,"\n")
#check for missing phenotypes
dropped <- NULL
droppedIND <- NULL
for(i in 1:length(pheno)) {
if(is.na(pheno[i]) || is.infinite(pheno[i])){
if(verbose) cat("INFO: Dropped individual ",i," with missing phenotype.\n")
dropped <- c(dropped,i)
if(!is.null(cross$mqm)){
droppedIND <- c(droppedIND,cross$mqm$augIND[i])
}
n.ind = n.ind-1
}
}
#Throw out missing phenotypes from phenotype vector and genotype matrix
if(!is.null(dropped)){
geno <- geno[-dropped,]
pheno <- pheno[-dropped]
}
#CHECK for previously augmented dataset
if(!is.null(cross$mqm)){
augmentedNind <- cross$mqm$Nind
augmentedInd <- cross$mqm$augIND
if(verbose){
cat("n.ind:",n.ind,"\n")
cat("augmentedNind:",augmentedNind,"\n")
cat("length(augmentedInd):",length(augmentedInd),"\n")
}
if(!is.null(dropped)){
augmentedInd <- cross$mqm$augIND[-dropped]
augmentedInd[1] <- 0
for(x in 1:(length(augmentedInd)-1)){
if(augmentedInd[x+1] - 1 > augmentedInd[x] ){
for(y in (x+1):length(augmentedInd)){
augmentedInd[y] <- augmentedInd[y] - ((augmentedInd[x+1] - augmentedInd[x])-1)
}
}
}
}
augmentedNind <- length(unique(augmentedInd))
if(verbose){
cat("New augmentedNind:",augmentedNind,"\n")
cat("New length(augmentedInd):",length(augmentedInd),"\n")
}
}else{
#No augmentation
augmentedNind <- n.ind
augmentedInd <- 0:n.ind
}
#CHECK if we have cofactors, so we can do backward elimination
backward <- 0;
if(missing(cofactors)){
if(verbose) cat("INFO: No cofactors, setting cofactors to 0\n")
cofactors = rep(0,n.mark)
}else{
if(length(cofactors) != n.mark){
if(verbose) cat("ERROR: # Cofactors != # Markers\n")
}else{
if(verbose) cat("INFO:",sum(cofactors!=0),"Cofactors received to be analyzed\n")
if((sum(cofactors) > n.ind-10 && dominance==0)){
stop("INFO: Cofactors don't look okay for use without dominance\n")
}
if((sum(cofactors)*2 > n.ind-10 && dominance==1)){
stop("INFO: Cofactors don't look okay for use with dominance\n")
}
if(sum(cofactors) > 0){
if(verbose) cat("INFO: Doing backward elimination of selected cofactors.\n")
backward <- 1;
n.run <- 0;
}else{
backward <- 0;
cofactors = rep(0,n.mark)
}
}
}
step.min = -off.end;
step.max = max(dist)+step.size+off.end;
step.max <- as.integer(ceiling((step.max+step.size)/step.size)*step.size)
if((step.min+step.size) > step.max){
stop("step.max needs to be >= step.min + step.size")
}
# cat("Step.min:",step.min," Step.max:",step.max,"\n")
if(step.size < 1){
stop("step.size needs to be >= 1")
}
qtlAchromo <- length(seq(step.min,step.max,step.size))
if(verbose) cat("INFO: Number of locations per chromosome: ",qtlAchromo, "\n")
end.1 <- proc.time()
result <- .C("R_mqmscan",
as.integer(n.ind),
as.integer(n.mark),
as.integer(1), # 1 phenotype
as.integer(geno),
as.integer(chr),
DIST=as.double(dist),
as.double(pheno),
COF=as.integer(cofactors),
as.integer(backward),
as.integer(forceML),
as.double(cofactor.significance),
as.integer(em.iter),
as.double(window.size),
as.double(step.size),
as.double(step.min),
as.double(step.max),
as.integer(n.run),
as.integer(augmentedNind),
as.integer(augmentedInd),
QTL=as.double(rep(0,2*n.chr*qtlAchromo)),
as.integer(estimate.map),
as.integer(ctype),
as.integer(dominance),
as.integer(verbose),
PACKAGE="qtl")
end.2 <- proc.time()
# initialize output object
qtl <- NULL
info <- NULL
names <- NULL
for(i in 1:(n.chr*qtlAchromo)) {
#Store the result in the qtl object
qtl <- rbind(qtl,c(ceiling(i/qtlAchromo),rep(seq(step.min,step.max,step.size),n.chr)[i],result$QTL[i]))
info <- rbind(info,result$QTL[(n.chr*qtlAchromo)+i])
#make names in the form: cX.locXX
names <- c(names,paste("c",ceiling(i/qtlAchromo),".loc",rep(seq(step.min,step.max,step.size),n.chr)[i],sep=""))
}
if(estimate.map){
new.map <- pull.map(cross)
chrmarkers <- nmar(cross)
sum <- 1
for(i in 1:length(chrmarkers)) {
for(j in 1:chrmarkers[[i]]) {
new.map[[i]][j] <- result$DIST[sum]
sum <- sum+1
}
}
}
if(plot){
if(estimate.map && backward){
op <- par(mfrow = c(3,1))
}else{
if(estimate.map || backward){
op <- par(mfrow = c(2,1))
}else{
op <- par(mfrow = c(1,1))
}
}
if(estimate.map){
if(verbose) cat("INFO: Viewing the user supplied map versus genetic map used during analysis.\n")
plotMap(pull.map(cross), new.map,main="Supplied map versus re-estimated map")
}
}
if(backward){
if(!estimate.map){
new.map <- pull.map(cross)
}
chrmarkers <- nmar(cross)
mapnames <- NULL
for(x in 1:nchr(cross)){
mapnames <- c(mapnames,names(pull.map(cross)[[x]]))
}
sum <- 1
model.present <- 0
qc <- NULL
qp <- NULL
qn <- NULL
for(i in 1:length(chrmarkers)) {
for(j in 1:chrmarkers[[i]]) {
#cat("INFO ",sum," ResultCOF:",result$COF[sum],"\n")
if(result$COF[sum] != 48){
if(verbose) cat("MODEL: Marker",sum,"named:", strsplit(names(unlist(new.map)),".",fixed=TRUE)[[sum]][2],"from model found, CHR=",i,",POSITION=",as.double(unlist(new.map)[sum])," cM\n")
qc <- c(qc, as.character(names(cross$geno)[i]))
qp <- c(qp, as.double(unlist(new.map)[sum]))
qn <- c(qn, mapnames[sum])
model.present <- 1
}
sum <- sum+1
}
}
if(!is.null(qc) && model.present){
why <- sim.geno(savecross,n.draws=1)
QTLmodel <- makeqtl(why, qc, qp, qn, what="draws")
attr(QTLmodel,"mqm") <- 1
if(plot) plot(QTLmodel)
}
}
rownames(qtl) <- names
qtl <- cbind(qtl,1/(min(info))*(info-min(info)))
qtl <- cbind(qtl,1/(min(info))*(info-min(info))*qtl[,3])
colnames(qtl) = c("chr","pos (cM)",paste("LOD",colnames(cross$pheno)[pheno.col]),"info","LOD*info")
#Convert to data/frame and scanone object so we can use the standard plotting routines
qtl <- as.data.frame(qtl, stringsAsFactors=TRUE)
if(backward && !is.null(qc) && model.present){
attr(qtl,"mqmmodel") <- QTLmodel
cimcovar <- as.data.frame(cbind(as.numeric(attr(qtl,"mqmmodel")[[4]]),as.data.frame(attr(qtl,"mqmmodel")[[5]])), stringsAsFactors=TRUE)
rownames(cimcovar) <- attr(qtl,"mqmmodel")[[2]]
colnames(cimcovar) <- c("chr","pos")
attr(qtl, "marker.covar.pos") <- cimcovar
}
class(qtl) <- c("scanone",class(qtl))
if(outputmarkers){
#Remove pseudomarkers from the dataset and scale to the chromosome
#Somewhat longer then off-end to be able to put back the original markers
for( x in 1:nchr(cross)){
to.remove <- NULL
chr.length <- max(cross$geno[[x]]$map)
markers.on.chr <- which(qtl[,1]==x)
to.remove <- markers.on.chr[which(qtl[markers.on.chr,2] > chr.length+off.end+(2*step.size))]
to.remove <- c(to.remove,markers.on.chr[which(qtl[markers.on.chr,2] < -off.end)])
if(length(to.remove) > 0) qtl <- qtl[-to.remove,]
}
qtl <- addmarkerstointervalmap(cross,qtl)
qtl <- as.data.frame(qtl, stringsAsFactors=TRUE)
if(backward && !is.null(qc) && model.present){
attr(qtl,"mqmmodel") <- QTLmodel
cimcovar <- as.data.frame(cbind(as.numeric(attr(qtl,"mqmmodel")[[4]]),as.data.frame(attr(qtl,"mqmmodel")[[5]])), stringsAsFactors=TRUE)
rownames(cimcovar) <- attr(qtl,"mqmmodel")[[2]]
colnames(cimcovar) <- c("chr","pos")
attr(qtl, "marker.covar.pos") <- cimcovar
}
class(qtl) <- c("scanone",class(qtl))
}
#Now we handle the off-end and any shifts comming from that (newcmbase)
#We didn't thow away the old names, and could thus get duplicates
qtlnew <- qtl
rownames(qtlnew) <- NULL
oldnames <- rownames(qtl)
for(x in 1:n.chr){
#Do per chromosome
markers.on.chr <- which(qtl[,1]==x)
if(newcmbase[x] !=0 && nrow(qtl[markers.on.chr,]) > 0){
qtl[markers.on.chr,2] <- qtl[markers.on.chr,2]+newcmbase[x]
#We end up with 3 posibilities
for(n in 1:nrow(qtl[markers.on.chr,])){
name <- rownames(qtl[markers.on.chr,])[n]
id <- which(name==oldnames)
if(!is.na(name) && grepl(".loc",name,fixed=TRUE)){
#a marker we need to shift
rownames(qtlnew)[id] <- paste(strsplit(name,".loc",fixed=TRUE)[[1]][1],".loc",qtlnew[id,2],sep="")
}else{
#a marker with a user defined name, no need to shift
rownames(qtlnew)[id] <- name
}
}
}else{
#No shift for the chromosome so we don't have to worry about shifting
rownames(qtlnew)[markers.on.chr] <- rownames(qtl)[markers.on.chr]
}
}
qtl <- qtlnew
for( x in 1:nchr(cross)){
#Remove the off ends that we left hanging a step before
if(class(cross$geno[[x]])!="X"){
to.remove <- NULL
chr.length <- max(cross$geno[[x]]$map)
markers.on.chr <- which(qtl[,1]==x)
to.remove <- markers.on.chr[which(qtl[markers.on.chr,2] > chr.length+off.end)]
to.remove <- c(to.remove,markers.on.chr[which(qtl[markers.on.chr,2] < -off.end)])
qtl <- qtl[-to.remove,]
}
}
qtl[,1] <- factor(names(cross$geno)[qtl[,1]], levels=names(cross$geno))
#Plot the results if the user asked for it
if(plot){
info.c <- qtl
#Check for errors in the information content IF err we can't do a second plot
e <- 0
for(i in 1:ncol(qtl)){
if(is.na(info.c[i,5])){
e<- 1
}
if(is.infinite(info.c[i,5])){
e<- 1
}
if(is.null(info.c[i,5])){
e<- 1
}
}
#No error do plot 2
if(!e){
mqmplot.singletrait(qtl,main=paste(colnames(cross$pheno)[pheno.col],"at significance=",cofactor.significance))
}else{
plot(qtl,main=paste(colnames(cross$pheno)[pheno.col],"at significance=",cofactor.significance),lwd=1)
grid(max(qtl$chr),5)
labels <- paste("QTL",colnames(cross$pheno)[pheno.col])
legend("topright", labels,col=c("black"),lty=c(1))
}
op <- par(mfrow = c(1,1))
}
end.3 <- proc.time()
if(verbose) cat("INFO: Calculation time (R->C,C,C-R): (",round((end.1-start)[3], digits=3), ",",round((end.2-end.1)[3], digits=3),",",round((end.3-end.2)[3], digits=3),") (in seconds)\n")
qtl
}else{
stop("Currently only F2, BC, and selfed RIL crosses can be analyzed by MQM.")
}
}
# end of mqmscan.R
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.