#Load some libraries library(unpakathonJan2016) library(dplyr) library(ggplot2) library(reshape) library(GGally)
Here is the distribution of phenotypes across non-farm experiments. The numbers correspond to the number of lines assessed for that phenotype not the number of plants
with(phenolong %>% filter(meta.experiment != "farm") %>% select(accession,experiment,variable) %>% distinct(), table(variable,experiment))
Let's check the distribution of values for each phenotype
ggplot(phenolong,aes(value)) + geom_histogram() + facet_wrap(~variable,scales = "free",ncol=4)
Several weird values for several phenotypes
Now look at problematic data (extreme values)
Here are the treatments and experiments with leaf.numbers greater than 50
phenolong %>% filter(variable=="leaf.number",value>50)%>%select(experiment,treatment)%>%distinct()
Looks ok, these are all from cofc-second (big pots and fert), we'll keep em.
I'm going to flag points that are outside the central 90% of all values for a phenotype as suspect. We can then switch this off and on and look at the outlier effect in analyses below
phenoquant <- phenolong %>% group_by(variable) %>% summarise(quant05 = quantile(value,0.05,na.rm=T),quant95 = quantile(value,0.95,na.rm=T)) phenolong.quant = left_join(phenolong,phenoquant) phenolong.quant$inrange <- with(phenolong.quant,ifelse((quant95>=value)&(quant05<=value),TRUE,FALSE)) phenolong = phenolong.quant
For example, look at the ggplot from above now
ggplot(phenolong%>%filter(inrange),aes(value)) + geom_histogram() + facet_wrap(~variable,scales = "free",ncol=4)
exp1long <- phenolong %>% filter(meta.experiment=="1",inrange) %>% group_by(variable,accession) %>% summarise(value=mean(value,na.rm=T)) exp1.col = exp1long %>% filter(accession%in%c("CS70000","CS60000")) exp1.phyt = exp1long %>% filter(grepl("CS",accession)) phenos=unique(exp1long$variable) tmp=sapply(phenos,function(ph){ tmpphen = exp1long[exp1long$variable==ph,] if (ph=="fruitnum") {tmpphen = tmpphen[tmpphen$value<400,]} hist(tmpphen$value,main=paste("exp1",ph)) tmpphyt=exp1.phyt %>% filter(variable==ph) tmpcol=exp1.col %>% filter(variable==ph) points(y=rep(0,dim(tmpphyt)[1]),x=tmpphyt$value, col="blue",pch=16, cex=1.1) #phytometers points(y=rep(0,dim(tmpcol)[1]),x=tmpcol$value, col="red",pch=16, cex=1.1) # columbia especially })
#phenos=c(unique(exp1long$variable)) phenos<- c("aborted.fruits","avg.fruit.length","basal.branch","days.to.bolt","diameter.at.bolt","fruitnum","germinants.14d","inflorescence.height", "ttl.maininfl.branch") exp1long <- exp1long %>% filter(variable %in% phenos) exp1wide <- cast(exp1long) paircompare <- exp1wide[,-1] paircompare=paircompare[complete.cases(paircompare),] ggpairs(paircompare,lower=list(continuous="smooth"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.