Nothing
fitTetra <-
function (
marker, data, diplo=NA,
select=TRUE, diploselect=TRUE, # by default all samples of tetraploids and diploids selected
maxiter=40, maxn.bin=200, nbin=200,
sd.threshold=0.1, p.threshold=0.99,
call.threshold=0.6, peak.threshold=0.85,
try.HW=TRUE, dip.filter=1,
sd.target=NA,
plot="none", plot.type="png", plot.dir=NA) {
if (class(data)!="data.frame" ||
length(which(names(data)=="MarkerName"))!=1 ||
length(which(names(data)=="SampleName"))!=1 ||
length(which(names(data)=="ratio"))!=1) {
stop("data is not a valid data frame")
}
# dip.filter: change to integer if needed
# (for compatibility with earlier versions where it was logical)
dip.filter <- as.integer(dip.filter)
if (dip.filter<0 || dip.filter>2) dip.filter <- 1
# arrange plotting output:
plot <- tolower(plot)
if (plot!="none") {
if (substr(plot.type,1,1)=="_")
plot.type <- substr(plot.type,2,5) #already checked by saveMarkerModels
else plot.type <- check.plottype(plot.type)
if (plot.type=="none") plot<-"none"
}
plotall <- plot=="all"
plotfitted <- plotall || plot=="fitted"
if (is.na(plot.dir)) plot.dir <- ""
else {
plot.dir <- gsub("(^ +)|( +$)", "", plot.dir) #strip leading and trailing blanks
if (plot.dir != "") {
plot.dir <- paste(plot.dir,"/",sep="")
}
}
# get all markernames, samplenames and diplonames:
markernames <- levels(as.factor(as.character(data$MarkerName)))
markername <- markernames[marker]
mrknr <- padded(marker,length(markernames)) #adds leading zeroes if needed
samplenames <- levels(as.factor(as.character(data$SampleName)))
if (class(diplo)=="data.frame") #not NA
diplonames <- levels(as.factor(as.character(diplo$SampleName)))
# start log
log <- ""
log <- append(log,paste(marker,"\tname=",markername,sep=""))
resultnames <- getResultnames(ng=5)
rejected <- FALSE
# first: select the diploid data (if any) to plot into the tetraploid histogram:
drr <- numeric(0) #for later test on length>0
if (class(diplo)=="data.frame") {
if (class(diplo$ratio)!="numeric") stop("diplo$ratio is not numeric")
if (length(diploselect)!=nrow(diplo)) {
diploselect <- rep(diploselect, times=ceiling(nrow(diplo)/length(diploselect)))
if (length(diploselect)>nrow(diplo)) diploselect <- diploselect[1:nrow(diplo)]
}
drr <- diplo$ratio[(diplo$MarkerName==markername) & diploselect & !is.na(diplo$ratio)]
#drr contains only non-NA ratios and excludes any for which diploselect is FALSE
}
#now the tetraploids:
if (class(data$ratio)!="numeric") stop("data$ratio is not numeric")
if (length(select)!=nrow(data)) {
select <- rep(select, times=ceiling(nrow(data)/length(select)))
if (length(select)>nrow(data)) select <- select[1:nrow(data)]
}
nsamp<-length(samplenames)
rr <- data$ratio[(data$MarkerName==markername) & select & !is.na(data$ratio)] #selection for minimum intensity
#rr contains only non-NA ratios and excludes any for which select is FALSE
# calculate the total number of selected samples, including the ones with ratio NA even if they are not in data:
missingsamp <- nsamp - length(data$ratio[data$MarkerName==markername])
countselected <- sum(select[data$MarkerName==markername], na.rm=T)
nselsamp <- countselected + missingsamp; rm(missingsamp,countselected)
if (length(rr)<50) { # in CodomMarker at least 10*ng observations required
rejected <- TRUE
log <- append(log,paste(marker,"","","discarded: too few data",sep="\t"))
mrkresult <- NA
}
else {
if (length(rr)<nselsamp*call.threshold) {
rejected <- TRUE
log <- append(log,paste(marker,"","","discarded: too many NA ratios",sep="\t"))
mrkresult <- NA
}
else { #test if no samples occur more than once for this marker
allsamp <- as.factor(data$SampleName[data$MarkerName==markername])
tabsamp <- tabulate(allsamp)
if (max(tabsamp)>1) {
rejected <- TRUE
log <- append(log,paste(marker,"","","discarded: some samples occur more than once",sep="\t"))
mrkresult <- NA
}
if (class(diplo)=="data.frame") {
allsamp <- diplo$SampleName[diplo$MarkerName==markername]
tabsamp <- tabulate(allsamp)
if (max(tabsamp)>1) {
rejected <- TRUE
log <- append(log,paste(marker,"","","discarded: some diploid samples occur more than once",sep="\t"))
mrkresult <- NA
}
}
}
}
if (!rejected) {
# sufficient observations and no double samples
names(rr) <- data$SampleName[(data$MarkerName==markername) & select & !is.na(data$ratio)] #tetraploid sample names, should be unique within marker
if (class(diplo)=="data.frame") {
names(drr) <- diplo$SampleName[(diplo$MarkerName==markername) & diploselect & !is.na(diplo$ratio)] #diploid sample names, should be unique within marker
}
# try several models with 5 peaks
ng <- 5 #ng = number of genotypic classes
nmodel <- 32 #8 models with 2, 3 or 4 start configurations
mrkresult <- list()
mrkresult$stats<-data.frame(rep(NA,nmodel))
for (i in 2:length(resultnames)) mrkresult$stats[[i]] <- rep(NA,nmodel)
names(mrkresult$stats) <- resultnames
mrkresult$stats$m <- 1:nmodel
mrkresult$probs <- list()
# 8 models with clustering:
plot.fname <- paste(plot.dir,"plots ",mrknr," A ",markername,sep="")
if (plotall) start.plot(plot.type,plot.fname, ncol=2, nrow=4)
# use kmeans with fixed random seed for reproducible results
# but don't affect the random or non-random number sequence for
# the calling program:
if (!exists('.Random.seed')) runif(1) #.Random.seed only generated in first call of random number generator
savedseed <- .Random.seed; set.seed(3)
# initialise the cluster means once
clusinit <- ClusterInit(asin(sqrt(rr)), ng=ng) # returns clus.mu and clus.sd on transformed scale
clus.mu <- sin(clusinit$clus.mu)^2 # transform clus.mu back to 0-1 scale, but keep clus.sd on transformed scale
for (model in 1:8) if (try.HW || getPtype(model)!="p.HW") {
mrkresult<-MarkerResult(marker,markername,rr,model=model,mutype=getMutype(model),ptype=getPtype(model),
maxiter=maxiter,maxn.bin=maxn.bin,nbin=nbin, mustart=clus.mu, sdstart=clusinit$clus.sd,
mrkresult=mrkresult,nsamp=nselsamp,plothist=plotall)
if (plotall && is.na(mrkresult$stats$npar[model]))
errorplot(paste(marker,markername,getModelName(getMutype(model),getPtype(model))))
}
.Random.seed <- savedseed
if (plotall) save.plot(plot.type,plot.fname)
#same models without clustering:
plot.fname <- paste(plot.dir,"plots ",mrknr," B ",markername,sep="")
if (plotall) start.plot(plot.type,plot.fname, ncol=2, nrow=4)
for (model in 9:16) if (try.HW || getPtype(model)!="p.HW") {
mrkresult<-MarkerResult(marker,markername,rr,model=model,mutype=getMutype(model),ptype=getPtype(model),clus=F,
maxiter=maxiter,maxn.bin=maxn.bin,nbin=nbin,mrkresult=mrkresult,nsamp=nselsamp,plothist=plotall)
if (plotall && is.na(mrkresult$stats$npar[model]))
errorplot(paste(marker,markername,getModelName(getMutype(model),getPtype(model))))
}
if (plotall) save.plot(plot.type,plot.fname)
modelsfitted <- 16
mrkresult$stats <- calcSelcrit(mrkresult$stats,rep(T,nrow(mrkresult$stats)),sd.target)
optmodel1 <- which.min(mrkresult$stats$selcrit)
if (length(optmodel1)==0 || is.na(optmodel1)) {
rejected <- TRUE
log <- append(log,paste(marker,"optmodel1=NA","","discarded: optmodel1=NA",sep="\t"))
} else {
#optmodel1!=NA
#First we check for two situations indication a possibly wrong fit:
# - the nulli or quadruplex peak is very small while the simplex or triplex is big, or
# - there is a small peak at the simplex or triplex while the flanking peaks are bigger
#In these cases we run all the models again with a new mustart
opt.ptype <- getPtype(optmodel1)
opt.mutype <- getMutype(optmodel1)
logline <- paste(marker,"\toptmodel1=",optmodel1,"\t",getModelName(opt.mutype,opt.ptype),sep="")
#Check for a small nulli or quadruplex next to a large simplex or triplex:
#Note: in F1 sim+duplex or du+triplex are also possible, but nulli+sim
#and tri+quadri are frequently shifted towards the middle
P0 <- which(names(mrkresult$stats)=="Pact0")
Pactpeak <- unlist(subset(mrkresult$stats, subset=1:nrow(mrkresult$stats)==optmodel1)[P0:(P0+4)])
#Pactpeak[1:5] are now the fractions of selected samples in nulliplex-tetraplex peaks
max.nulli <- 0.025; min.sim<-0.15 #thresholds for small extreme peak detection
#false positive in F1 at 1:8:18:8:1 (1 of 25 seg.types)
#and in HW with major allele freq 0.58-0.61
if ( (Pactpeak[1]<max.nulli && Pactpeak[2]>min.sim) ||
(Pactpeak[5]<max.nulli && Pactpeak[4]>min.sim) ) {
loside <- Pactpeak[1]<max.nulli && Pactpeak[2]>min.sim #at low side of range
if (loside) {
if (Pactpeak[5]<max.nulli && Pactpeak[4]>min.sim) {
#situation occurs at both sides, which is most extreme?
loside <- (Pactpeak[2]-Pactpeak[1]) > (Pactpeak[4]-Pactpeak[5])
}
}
#set the nwmu according to low side or high side:
nwmu <- numeric(5)
mu0 <- which(names(mrkresult$stats)=="muact0")
muactpeak <- unlist(subset(mrkresult$stats, subset=1:nrow(mrkresult$stats)==optmodel1)[mu0:(mu0+4)]) #the current mu's
if (loside) {
nwmu[1:4] <- muactpeak[2:5]
nwmu[5] <- (nwmu[4]+pi/2)/2
} else {
nwmu[2:5] <- muactpeak[1:4]
nwmu[1] <- nwmu[2]/2
}
#fit all models with the new mustart:
nwmu <- sin(nwmu)*sin(nwmu) #to untransformed ratios
plot.fname <- paste(plot.dir,"plots ",mrknr," C ",markername,sep="")
if (plotall) start.plot(plot.type,plot.fname, ncol=2, nrow=4)
for (model in 17:24) if (try.HW || getPtype(model)!="p.HW") {
mrkresult<-MarkerResult(marker,markername,rr,model=model,mutype=getMutype(model),ptype=getPtype(model),clus=F,
maxiter=maxiter,maxn.bin=maxn.bin,nbin=nbin,mustart=nwmu,
mrkresult=mrkresult,nsamp=nselsamp,plothist=plotall)
if (plotall && is.na(mrkresult$stats$npar[model]))
errorplot(paste(marker,markername,getModelName(getMutype(model),getPtype(model))))
}
if (plotall) save.plot(plot.type,plot.fname)
modelsfitted <- 24
} # small nulli or quadruplex peak next to big peak
else {
#check for dip at simplex or triplex peak:
if (mrkresult$stats$dip[optmodel1]) {
# (note: indices from [1] to [5], not [0] to [4] !)
p<-rep(0,5)
p[1]<-mrkresult$stats$P0[optmodel1]
p[2]<-mrkresult$stats$P1[optmodel1]
p[3]<-mrkresult$stats$P2[optmodel1]
p[4]<-mrkresult$stats$P3[optmodel1]
p[5]<-mrkresult$stats$P4[optmodel1]
mubk<-rep(0,5)
mubk[1]<-mrkresult$stats$mu0[optmodel1]
mubk[2]<-mrkresult$stats$mu1[optmodel1]
mubk[3]<-mrkresult$stats$mu2[optmodel1]
mubk[4]<-mrkresult$stats$mu3[optmodel1]
mubk[5]<-mrkresult$stats$mu4[optmodel1]
gsamp<-hist(mrkresult$probs[[optmodel1]]$maxgeno,breaks=c(-0.5,0.5,1.5,2.5,3.5,4.5),plot=F)$counts #based on maxgeno: all samples in peaks
toler<-1.02 #doesn't harm to do this too often, we still should obtain the correct model
#test both valleys and select widest (we do not test for valley at duplex peak)
valley<-numeric(0)
if ( (gsamp[2]<(toler*gsamp[1]) && gsamp[2]<(toler*max(gsamp[3:5])) && gsamp[1]>3) ||
(opt.ptype=="p.free" && p[2]<(toler*p[1]) && p[2]<(toler*max(p[3:5])) && p[1]>0.01) ) {
valley <- 1
}
if ( (gsamp[4]<(toler*gsamp[5]) && gsamp[4]<(toler*max(gsamp[1:3])) && gsamp[5]>3 ) ||
(opt.ptype=="p.free" && p[4]<(toler*p[5]) && p[4]<(toler*max(p[1:3])) && p[5]>0.01) ) {
valley[length(valley)+1] <- 3
}
if (length(valley)==2) {
#select widest valley based on the mu's on the transformed scale
if ( (mrkresult$stats$mu4[optmodel1]-mrkresult$stats$mu2[optmodel1]) >
(mrkresult$stats$mu2[optmodel1]-mrkresult$stats$mu0[optmodel1]) ) {
valley <- 3
}
else valley <- 1
logline <- paste(logline,"\tdips at peak 1&3, widest at",valley,sep="")
}
else if (length(valley)==1)
logline <- paste(logline,"\tdip at peak",valley,sep="")
if (length(valley)==1) {
#calculate the new mustart:
if (valley==1) {
mubk[2:4]<-mubk[3:5]
mubk[5]<-(mubk[5]+1)/2
}
else { #valley==3
mubk[2:4]<-mubk[1:3]
mubk[1]<-mubk[1]/2
}
# fit all models with the new mustart:
plot.fname <- paste(plot.dir,"plots ",mrknr," C ",markername,sep="")
if (plotall) start.plot(plot.type,plot.fname, ncol=2, nrow=4)
for (model in 17:24) if (try.HW || getPtype(model)!="p.HW") {
mrkresult<-MarkerResult(marker,markername,rr,model=model,mutype=getMutype(model),ptype=getPtype(model),clus=F,
maxiter=maxiter,maxn.bin=maxn.bin,nbin=nbin,mustart=mubk,mrkresult=mrkresult,nsamp=nselsamp,plothist=plotall)
if (plotall && is.na(mrkresult$stats$npar[model]))
errorplot(paste(marker,markername,getModelName(getMutype(model),getPtype(model))))
}
if (plotall) save.plot(plot.type,plot.fname)
modelsfitted <- 24
#check if the best model still has a dip:
mrkresult$stats <- calcSelcrit(mrkresult$stats,rep(T,nrow(mrkresult$stats)),sd.target) #all rows
optmodel2 <- which.min(mrkresult$stats$selcrit)
if (mrkresult$stats$dip[optmodel2]) {
#optimal model still has dip
#we again take the mu's and the valley of optmodel1 but now shift in opposite direction:
mubk[1]<-mrkresult$stats$mu0[optmodel1]
mubk[2]<-mrkresult$stats$mu1[optmodel1]
mubk[3]<-mrkresult$stats$mu2[optmodel1]
mubk[4]<-mrkresult$stats$mu3[optmodel1]
mubk[5]<-mrkresult$stats$mu4[optmodel1]
#calculate the new mustart:
if (valley==1) {
mubk[2]<-mubk[1]
mubk[1]<-mubk[1]/2
}
else { #valley==3
mubk[4]<-mubk[5]
mubk[5]<-(mubk[5]+1)/2
}
# fit all models with the new mustart:
plot.fname <- paste(plot.dir,"plots ",mrknr," D ",markername,sep="")
if (plotall) start.plot(plot.type,plot.fname, ncol=2, nrow=4)
for (model in 25:32) if (try.HW || getPtype(model)!="p.HW") {
mrkresult<-MarkerResult(marker,markername,rr,model=model,mutype=getMutype(model),ptype=getPtype(model),clus=F,
maxiter=maxiter,maxn.bin=maxn.bin,nbin=nbin,mustart=mubk,mrkresult=mrkresult,nsamp=nselsamp,plothist=plotall)
if (plotall && is.na(mrkresult$stats$npar[model]))
errorplot(paste(marker,markername,getModelName(getMutype(model),getPtype(model))))
}
if (plotall) save.plot(plot.type,plot.fname)
modelsfitted <- 32
} # if dip[optmodel2]
} #if length(valley)==1
} # if dip[optmodel1]
} # 2 big peaks else
log <- append(log, logline)
#now select the best model (optmodel2) taking dip into account:
if (dip.filter>0) {
dipok<-!(is.na(mrkresult$stats$dip) | mrkresult$stats$dip) #only rows with model without dip
mrkresult$stats <- calcSelcrit(mrkresult$stats,dipok,sd.target)
optmodel2 <- which.min(mrkresult$stats$selcrit[1:modelsfitted])
if (length(optmodel2)==0 || is.na(optmodel2)) {
mrkresult$stats <- calcSelcrit(mrkresult$stats,rep(T,nrow(mrkresult$stats)),sd.target) #all rows
optmodel2 <- which.min(mrkresult$stats$selcrit[1:modelsfitted])
}
}
else {
mrkresult$stats <- calcSelcrit(mrkresult$stats,rep(T,nrow(mrkresult$stats)),sd.target) #all rows
optmodel2 <- which.min(mrkresult$stats$selcrit[1:modelsfitted])
}
if (length(optmodel2)==0 || is.na(optmodel2)) {
# discard this marker
rejected <- TRUE
log <- append(log,paste(marker,"optmodel2=NA","","marker discarded: optmodel2=NA",sep="\t"))
} else {
# optmodel2 found
opt.ptype <- getPtype(optmodel2)
opt.mutype <- getMutype(optmodel2)
logline <- paste(marker,"\toptmodel2=",optmodel2,"\t",getModelName(opt.mutype,opt.ptype),sep="")
if (mrkresult$stats$sdtrans0[optmodel2]>sd.threshold) {
rejected <- TRUE
log <- append(log,paste(logline,"discarded: sd>sd.threshold",sep="\t"))
mrkresult$stats$message[optmodel2] <- "rejected: sd>sd.threshold"
}
else if (mrkresult$stats$dip[optmodel2] && dip.filter==2) {
rejected <- TRUE
mrkresult$stats$message[optmodel2] <- "rejected: no models without dip"
log <- append(log,paste(logline, "discarded: no models without dip"))
}
else {
# geno<-maxgeno for samples above p.threshold, else NA:
mrkresult$probs[[optmodel2]]$geno<-rep(NA,length(rr))
sel.maxP <- mrkresult$probs[[optmodel2]]$maxP
sel.maxP <- sel.maxP>=p.threshold #without the temporary fix, not needed any more
mrkresult$probs[[optmodel2]]$geno[sel.maxP] <- mrkresult$probs[[optmodel2]]$maxgeno[sel.maxP]
if (length(mrkresult$probs[[optmodel2]]$geno[!is.na(mrkresult$probs[[optmodel2]]$geno)])<call.threshold*nselsamp) {
# note that we compare to the samples with select=T, not to the total number of samples
rejected <- TRUE
log <- append(log,paste(logline,"discarded: less than minimum number of samples called",sep="\t"))
mrkresult$stats$message[optmodel2] <- "rejected: less than minimum number of samples called"
} else {
gsamp<-hist(mrkresult$probs[[optmodel2]]$geno,breaks=c(-0.5,0.5,1.5,2.5,3.5,4.5),plot=F)$counts #based on called samples
if (max(gsamp)/sum(gsamp)>peak.threshold) {
rejected <- TRUE
log <- append(log,paste(logline, "discarded: more than maximum fraction of samples in one peak",sep="\t"))
mrkresult$stats$message[optmodel2] <- "rejected: more than maximum fraction of samples in one peak"
} else {
#optmodel2 accepted
if (mrkresult$stats$dip[optmodel2] && dip.filter==1) {
#if dip.filter==2 marker was already rejected; if 0 there might be a model without dip but worse selcrit
logline <- paste(logline, "no models without dip")
}
log <- append(log,logline)
probs <- makeprobs(marker,markername,samplenames,getModelName(opt.mutype,opt.ptype),select,
data$MarkerName==markername,rr,mrkresult$probs[[optmodel2]],data)
if (plotfitted) {
#plot the fitted model with the assigned genotypes:
modeldata <- list()
modeldata$psi <- list()
nfirst <- which(names(mrkresult$stats)=="mutrans0")
modeldata$psi$mu <- as.numeric(mrkresult$stats[optmodel2,nfirst:(nfirst+ng-1)])
modeldata$psi$sigma <- as.numeric(mrkresult$stats[optmodel2,(nfirst+ng):(nfirst+2*ng-1)])
modeldata$psi$p <- as.numeric(mrkresult$stats[optmodel2,(nfirst+2*ng):(nfirst+3*ng-1)])
plotfitted.fname <- paste(plot.dir,mrknr," ",markernames[marker],sep="") #final plot, fitted
start.plot(plot.type,plotfitted.fname)
layout(matrix(c(1,2)), heights=c(2,1)) #2 plots, upper 3x as high as lower
#upper plot: the fitted model
par(mar=c(0,4,3,1))
htet <- PlotHistDensity(rr, modeldata, xlim=c(0.001,0.999), trafo="asr",
maintitle=paste(marker,markername),
subtitle="", xlabel="", xaxis="n",
nbreaks=40, frequ=T)
if (length(drr)>0) {
#plot diploids histogram into tetraploids plot:
hdip <- hist(drr,breaks=htet$breaks,plot=F)
hdip$counts[hdip$counts>htet$ylim[2]]<-htet$ylim[2]
nbreaks <- length(htet$breaks)-1
binwidth <- 1/nbreaks
barwidth <- 1/3 #relative to binwidth
par(new=T)
barplot (hdip$counts,
width=binwidth*barwidth,
space=1/barwidth-1,
xlim=c(binwidth*(1-barwidth)/2,1+binwidth*(1-barwidth)/2),
ylim=htet$ylim,col="gray",main="",sub="",xlab="",xaxt="n")
}
#lower plot: the assigned genotypes
mrkresult$probs[[optmodel2]]$geno[is.na(mrkresult$probs[[optmodel2]]$geno)] <- -2 #plot well separated from the valid scores 0:4
par(mar=c(5,4,0,1))
sampcol = rep("blue",length(rr))
sampcol[mrkresult$probs[[optmodel2]]$geno < 0] = "red"
plot(0:1~0:1, type="n",
ylab="geno",ylim=c(-3,5),xlab="signal ratio",xlim=c(0.001,0.999) )
lines(c(-2,-2)~c(par("usr")[1]+0.004,par("usr")[2]-0.004),col="lightgrey",lty=3)
lines(c(0,0) ~ c(par("usr")[1]+0.004,par("usr")[2]-0.004),col="lightgrey",lty=3)
lines(c(1,1) ~ c(par("usr")[1]+0.004,par("usr")[2]-0.004),col="lightgrey",lty=3)
lines(c(2,2) ~ c(par("usr")[1]+0.004,par("usr")[2]-0.004),col="lightgrey",lty=3)
lines(c(3,3) ~ c(par("usr")[1]+0.004,par("usr")[2]-0.004),col="lightgrey",lty=3)
lines(c(4,4) ~ c(par("usr")[1]+0.004,par("usr")[2]-0.004),col="lightgrey",lty=3)
points(mrkresult$probs[[optmodel2]]$geno~rr,
pch="|",cex=0.5,col=sampcol)
save.plot(plot.type,plotfitted.fname)
} # if plotfitted
} # optmodel2 accepted (not too many samples in one peak)
} # not too few sample genotypes called
} # optmodel2 not rejected for large sigma / BIC>0
} #optmodel2!=NA
} #optmodel1!=NA
} # if sufficient observations
if (rejected) {
if (plotfitted) {
plotfitted.fname <- paste(plot.dir,"rejected_",mrknr," ",markernames[marker],sep="") #final plot, unfitted
start.plot(plot.type,plotfitted.fname)
layout(matrix(c(1,1)))
ymax <- 40
nbreaks <- 40
par(mar=c(5,4,3,1))
if (length(rr[!is.na(rr)])>0) {
nbars <- min(ceiling(nbreaks/(max(rr)-min(rr))),100,ceiling(length(rr)/2))
if (nbars<nbreaks) nbars<-nbreaks
htet <- hist(rr,breaks=seq(0,1,by=1/nbars),plot=FALSE)
if (ymax<1.1*max(htet$counts)) ymax<-1.1*max(htet$counts)
} else {
# hist fails without data, so we create a custom histogram:
htet <- list(breaks=seq(0,1,by=1/nbreaks),counts=rep(0,nbreaks))
class(htet) <- "histogram"
}
#plot the tetraploids histogram:
barplot (htet$counts,
width=htet$breaks[2]-htet$breaks[1],
space=0,xlim=c(0.001,0.999),ylim=c(0,ymax), main=paste(marker,markername,"(no fit)"),
col="white", xlab="signal ratio",ylab="Frequency")
axis(1,labels=TRUE, at=seq(0,1, by=0.2))
if (length(drr)>0) {
#plot diploids histogram into tetraploids plot:
hdip <- hist(drr,breaks=htet$breaks,plot=F)
hdip$counts[hdip$counts>ymax]<-ymax
binwidth <- 1/length(hdip$counts)
barwidth <- 1/3 #relative to binwidth
par(new=T)
barplot (hdip$counts,
width=binwidth*barwidth,
space=1/barwidth-1,
xlim=c(binwidth*(1-barwidth)/2,1+binwidth*(1-barwidth)/2),
ylim=c(0,ymax),
col="gray",main="",sub="",xlab="",xaxt="n")
}
save.plot(plot.type,paste("rejected_",plotfitted.fname,sep=""))
} #plotfitted
} #rejected
# finally prepare the output list:
result <- list(log=log)
if (length(mrkresult)==1) { #only if no fitting attempted because too few samples
result$allmodeldata <- NA
result$scores <- NA
result$diploscores <- NA
result$modeldata <- data.frame(
marker=marker,
markername=markername,
m=NA,
model="none",
nsamp=nselsamp,
nsel=length(rr))
for (i in 7:length(resultnames)) result$modeldata[[i]] <- NA
names(result$modeldata) <- resultnames
result$modeldata$message <- "rejected: too few observations selected"
}
else {
result$allmodeldata <- subset(mrkresult$stats,subset=c(rep(T,modelsfitted),rep(F,32-modelsfitted)))
# Published version: modeldata has only some columns from allmodeldata.
# New version 6-MAR-2012: modeldata has all columns from allmodeldata
if (rejected) {
if (!exists("optmodel2") || length(optmodel2)!=1 || is.na(optmodel2)) {
result$modeldata <- subset(mrkresult$stats,subset= (1:nrow(mrkresult$stats))==1)
result$modeldata$m <- NA #no model selected
result$modeldata$model <- NA #no model selected
if (!exists("optmodel2") || length(optmodel2)!=1 || is.na(optmodel2)) {
result$modeldata$message <- "optmodel1==NA"
} else {
result$modeldata$message <- "optmodel2==NA"
}
} else {
result$modeldata <- subset(mrkresult$stats,subset= (1:nrow(mrkresult$stats))==optmodel2)
}
for (i in 7:(length(resultnames)-1)) result$modeldata[[i]] <- NA #leave columns up to nsel and message
result$scores <- NA
result$diploscores <- NA
}
else { #not rejected
result$modeldata <- subset(mrkresult$stats,subset= (1:nrow(mrkresult$stats))==optmodel2)
result$scores <- probs
if (class(diplo)=="data.frame") { #not NA
# if diplo exists, also export scores for all samples in diplo:
psi <- list(mu=c(result$modeldata$mutrans0,result$modeldata$mutrans1,result$modeldata$mutrans2,result$modeldata$mutrans3,result$modeldata$mutrans4),
sigma=c(result$modeldata$sdtrans0,result$modeldata$sdtrans1,result$modeldata$sdtrans2,result$modeldata$sdtrans3,result$modeldata$sdtrans4),
p=c(result$modeldata$P0,result$modeldata$P1,result$modeldata$P2,result$modeldata$P3,result$modeldata$P4))
diploprobs <- EMGaussExp.vectorized(asin(sqrt(drr)), psi) #matrix, per diplo sample ng numbers: probs that sample belongs to each peak
#calculate maxgeno, maxP, geno:
maxPna <- apply(diploprobs,1,max) #find the maxP of each row of p-values (= of each sample)
maxP <- maxPna[!is.na(maxPna)] # omit missing values
probs <- data.frame(diploprobs)
rownames(probs) <- names(drr) #note: does not include samples rejected because select=F
names(probs) <- c("P0","P1","P2","P3","P4")
#list for each sample the maximum p value and the maxgeno corresponding to it:
probs$maxgeno<-max.col(diploprobs)-1 # for every row (sample) the max peak (0..4 instead of 1..5)
probs$maxP<-maxPna # for every row the P at the max
#add extra columns and add rows for samples not in drr:
result$diploscores <- makeprobs(marker,markername,diplonames,getModelName(opt.mutype,opt.ptype),diploselect,diplo$MarkerName==markername,drr,probs,diplo)
result$diploscores$geno<-rep(NA,nrow(result$diploscores))
sel.maxP <- which(result$diploscores$maxP>=p.threshold)
result$diploscores$geno[sel.maxP] <- result$diploscores$maxgeno[sel.maxP]
}
else result$diploscores <- NA
}
}
result
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.