In ebimodeling/sugarbag: R version of Sugarbag database of sugarcane traits and yields

title: "Overall Effect of N" author: "Ursula Ruiz-Vera, Deepak Jaiswal, David LeBauer" date: "Fri Nov 07 16:04:51 2014" output: html_document: default pdf_document: keep_tex: yes number_sections: yes word_document: default

#setwd("C:\\Users\\ruizver1\\Documents\\myprojectEBI\\sugarbag")
library(sugarbag)
library(data.table)
library(ggplot2)
setwd("./")
knitr::opts_chunk$set(warning=FALSE, cache=TRUE)

Prepare data

# Step 1:- Get all the experiment IDs  for whic Harvest, Experiment Design and Planting information is available

expID <-GetExpID_for_N_Combined()
# Step 2 :- Define variable name that we are interested in

varname <- c("DWMST","DWTOTA","LAI",  "NWTOTA", "SWMST","SWTOTA", "Latitude.x", "Longitude.x")

## Create dataframe
id <- expID[1]
Output <- Update_expID(id, varname)
N = length(expID)

# Get N versus Yield data from all the experiments andRBIND them to Output
for (i in 2:N){
  id <- expID[i]
  tmp_output <- Update_expID(id, varname)
  Output <- rbind(Output,tmp_output)
}

Calculate the plot means and then the maximum values of the variables

library(dplyr)

OUT <- data.table(Output)

OUT2 <- OUT[,list(DWMST=mean(DWMST, na.rm = TRUE),DWTOTA=mean(DWTOTA, na.rm = TRUE),LAI=mean(LAI, na.rm = TRUE), 
                  NWTOTA=mean(NWTOTA, na.rm = TRUE),SWMST=mean(SWMST, na.rm = TRUE),SWTOTA=mean(SWTOTA, na.rm = TRUE)), 
            by = c('ExpID', 'Treatment', 'Date', 'Ncombined', 'Cultivar', 'Irrigation')]


ndata <- unique(OUT2[,list(DWMST=max(DWMST, na.rm = TRUE),DWTOTA=max(DWTOTA, na.rm = TRUE), 
                  NWTOTA=max(NWTOTA, na.rm = TRUE),SWMST=max(SWMST, na.rm = TRUE),SWTOTA=max(SWTOTA, na.rm = TRUE)),
            by = c('ExpID','Treatment','Ncombined','Cultivar','Irrigation')])

#from here, I am selecting the dates when I got the maximum values and chosing those days from the firts file OUT to work with the raw data, not the means
ndata2 <- merge(OUT2, ndata, by = c('ExpID', 'Treatment', 'Ncombined', 'Cultivar', 'Irrigation'))

ndata3 <- filter(ndata2, SWMST.x-SWMST.y==0, ExpID!= "13", ExpID!="36", ExpID!= "14", ExpID!="7")

ndata4 <- select(ndata3, ExpID, Treatment, Ncombined, Cultivar, Irrigation, Date)

ndata5 <- merge(OUT,ndata4, by = c('ExpID', 'Treatment', 'Date', 'Ncombined', 'Cultivar', 'Irrigation'))

#d is the document with the data I want
d <- ndata5[!is.na(Ncombined), 
           list(ExpID, Treatment, Ncombined = as.numeric(Ncombined), Cultivar, Irrigation, DWTOTA, SWTOTA, NWTOTA, DWMST, SWMST, 
          yield = DWTOTA, sugar = SWTOTA, cane_yield = DWMST, cane_sugar = SWMST/DWMST, nitrogen_concentration = NWTOTA/DWTOTA)]

## Clean -Inf http://stackoverflow.com/a/12188551/199217
invisible(lapply(names(d),function(.name) set(d, which(is.infinite(d[[.name]])), j = .name,value =NA)))

Meta-analysis of N effects on yield, Sugar, and LAI

Exploratory Data Analysis

Our question is: Is sugarcane yield and sugar content limited by N and/or water availablility? Our Model: y = Nitrogen * Irrigation + epsilon where y is yield or sugar content (of total aboveground biomass, of cane)

Summarize

library(pander)
#Records where Cane Sugar Content are too low, it is because in some data DWMST is NA and it is giving cane_sugar=0
pander(d[cane_sugar < 0.1 & cane_sugar > 0])
d[cane_sugar < 0.1,`:=` (cane_sugar = NA)]

pander(data.frame(n = nrow(d)))
pander(summary(d[, list(Ncombined, yield, sugar, cane_yield, cane_sugar, nitrogen_concentration)], digits = 3))
pander(summary(lm(yield ~ Ncombined*Irrigation, data = d), digits = 3))
pander(summary(lm(cane_yield ~ Ncombined*Irrigation, data = d), digits = 3))
pander(summary(lm(cane_sugar ~ Ncombined*Irrigation, data = d), digits = 3))

Check distribution, outliers, odd variables

#dev.off helps me not to have problems running the ggplot
dev.off()
ggplot(data = d) + geom_histogram(aes(yield))

ggplot(data = d) + geom_histogram(aes(sugar))

ggplot(data = d) + geom_histogram(aes(cane_yield))

ggplot(data = d) + geom_histogram(aes(cane_sugar))

Look which ExpID had irrigation and non-irrigation

data(variables)
if(exists("variables"))pander(variables[variables$VariableName %in% colnames(ndata),])

ggplot(data = d) + geom_point(aes(Ncombined, yield, color = as.logical(Irrigation))) + 
  facet_wrap(~ExpID) + ylim(range(0, max(pretty(ndata$DWTOTA))))

#I won't use the variable sugar because there is few data
ggplot(data = d) + geom_point(aes(Ncombined, sugar, color = as.logical(Irrigation))) + 
  facet_wrap(~ExpID) + ylim(range(0, 4000))

ggplot(data = d) + geom_point(aes(Ncombined, cane_yield, color = as.logical(Irrigation))) + 
  facet_wrap(~ExpID) + ylim(range(0, 8000))

ggplot(data = d) + geom_point(aes(Ncombined, cane_sugar, color = as.logical(Irrigation))) + 
  facet_wrap(~ExpID) + ylim(range(0, 0.6))

ggplot(data = d) + geom_point(aes(Ncombined, nitrogen_concentration, color = as.logical(Irrigation))) + 
  facet_wrap(~ExpID) + ylim(range(0, 0.008))

##graphic with linear regression (by Ursula)

#yield
ggplot(data = d, aes(Ncombined, yield)) + 
geom_point(aes(Ncombined, yield ), alpha = 0.4) + 
stat_smooth(method=lm, level=0.9) +
ylim(0,9000) + 
xlab("amount of N (Kg/ha)") + ylab("Dry Weight of Above-ground Biomass (g/m2)")

ggplot(data = d, aes(Ncombined, log(yield))) + 
  geom_point(aes(Ncombined, log(yield) ), alpha = 0.4) + 
  stat_smooth(method=lm, level=0.9) +
  ylim(6.5,9.5) + 
  xlab("amount of N (Kg/ha)") + ylab("Log of the dry Weight of Above-ground Biomass (g/m2)") 

#cane yield
ggplot(data = d, aes(Ncombined, cane_yield)) + 
  geom_point(aes(Ncombined, cane_yield), alpha = 0.4) + 
  stat_smooth(method=lm, level=0.9) +
  ylim(0,7000) +
  xlab("amount of N (Kg/ha)") + ylab("Dry weight of the Cane Biomass (g/m2)")

ggplot(data = d, aes(Ncombined, log(cane_yield) )) + 
  geom_point(aes(Ncombined, log(cane_yield )), alpha = 0.4) + 
  stat_smooth(method=lm, level=0.9) +
  ylim(6.5,9.5) +
    xlab("amount of N (Kg/ha)") + ylab("Dry weight of the Cane Biomass (g/m2)")

#sugar in cane
ggplot(data = d, aes(Ncombined, cane_sugar)) + 
  geom_point(aes(Ncombined, cane_sugar), alpha = 0.4) + 
  stat_smooth(method=lm, level=0.9) + ylim(0.35,0.6) + 
  xlab("amount of N (Kg/ha)") + 
  ylab("Sucrose/Dry Weight, of the Cane Biomass")

#nitrogen in biomass
ggplot(data = d, aes(Ncombined,nitrogen_concentration) ) + 
  geom_point(aes(Ncombined, nitrogen_concentration), alpha = 0.4) + 
  stat_smooth(method=lm, level=0.9) + ylim(0,0.006) + 
  xlab("amount of N (Kg/ha)") + ylab("Nitrogen/Dry Weight, of the Above-ground Biomass")
####

Check basic models (testing normality with shapiro test and transforminng some variables, linear regression analysis (modified by Ursula))

#test
par(mfrow=c(2,2))

library(nlme)
install.packages("lme4")

hist(d$Ncombined)

shapiro.test(d$yield)
shapiro.test(log10(d$yield))
shapiro.test(log(d$yield))
shapiro.test(sqrt(d$yield))

d[,plot(Ncombined, log10(yield))]
plot(lm(log10(yield) ~ Ncombined, data = d), main = "Yield")

shapiro.test(d$cane_yield)
shapiro.test(log10(d$cane_yield))
shapiro.test(log(d$cane_yield))
shapiro.test(sqrt(d$cane_yield))
hist(d$cane_yield)
hist(log10(d$cane_yield))
plot(lm(log10(cane_yield) ~ Ncombined, data = d), main = "Cane Yield")

#this one (cane_sugar) doesn't fit in any transformation criteria
shapiro.test(d$cane_sugar)
shapiro.test(log10(d$cane_sugar))
shapiro.test(log(d$cane_sugar))
shapiro.test(sqrt(d$cane_sugar))
hist(d$cane_sugar)
plot(lm(cane_sugar ~ Ncombined, data = d), main = "Cane % Sugar")


shapiro.test(d$nitrogen_concentration)
shapiro.test(log10(d$nitrogen_concentration))
shapiro.test(log(d$nitrogen_concentration))
shapiro.test(sqrt(d$nitrogen_concentration))
hist(d$nitrogen_concentration)
plot(lm(nitrogen_concentration ~ Ncombined, data = d), main = "% Nitrogen")

Regression Models

These are throwing an error ... still need to debug.

Total Aboveground Biomass

summary(nlme::lme(fixed = log(yield) ~ Ncombined, random = ~1|ExpID, data = d, na.action = na.omit))

Cane_Yield

summary(nlme::lme(fixed = log(cane_yield) ~ Ncombined, random = ~1|ExpID, data = d, na.action = na.omit))

Sugar Content of Cane

summary(nlme::lme(fixed = cane_sugar ~ Ncombined, random = ~1|ExpID, data = d, na.action = na.omit))

Nitrogen Concentration

summary(nlme::lme(fixed = nitrogen_concentration ~ Ncombined, random = ~1|ExpID, data = d, na.action = na.omit))

ebimodeling/sugarbag documentation built on May 15, 2019, 7:32 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

ebimodeling/sugarbag
R version of Sugarbag database of sugarcane traits and yields

In ebimodeling/sugarbag: R version of Sugarbag database of sugarcane traits and yields

Prepare data

Calculate the plot means and then the maximum values of the variables

Meta-analysis of N effects on yield, Sugar, and LAI

Exploratory Data Analysis

Summarize

Check distribution, outliers, odd variables

Look which ExpID had irrigation and non-irrigation

Check basic models (testing normality with shapiro test and transforminng some variables, linear regression analysis (modified by Ursula))

Regression Models

These are throwing an error ... still need to debug.

Total Aboveground Biomass

Cane_Yield

Sugar Content of Cane

Nitrogen Concentration

R Package Documentation

Browse R Packages

We want your feedback!

ebimodeling/sugarbag R version of Sugarbag database of sugarcane traits and yields

In ebimodeling/sugarbag: R version of Sugarbag database of sugarcane traits and yields

Prepare data

Calculate the plot means and then the maximum values of the variables

Meta-analysis of N effects on yield, Sugar, and LAI

Exploratory Data Analysis

Summarize

Check distribution, outliers, odd variables

Look which ExpID had irrigation and non-irrigation

Check basic models (testing normality with shapiro test and transforminng some variables, linear regression analysis (modified by Ursula))

Regression Models

These are throwing an error ... still need to debug.

Total Aboveground Biomass

Cane_Yield

Sugar Content of Cane

Nitrogen Concentration

R Package Documentation

Browse R Packages

We want your feedback!

ebimodeling/sugarbag
R version of Sugarbag database of sugarcane traits and yields