title: "Overall Effect of N" author: "Ursula Ruiz-Vera, Deepak Jaiswal, David LeBauer" date: "Fri Nov 07 16:04:51 2014" output: html_document: default pdf_document: keep_tex: yes number_sections: yes word_document: default


#setwd("C:\\Users\\ruizver1\\Documents\\myprojectEBI\\sugarbag")
library(sugarbag)
library(data.table)
library(ggplot2)
setwd("./")
knitr::opts_chunk$set(warning=FALSE, cache=TRUE)

Prepare data

# Step 1:- Get all the experiment IDs  for whic Harvest, Experiment Design and Planting information is available

expID <-GetExpID_for_N_Combined()
# Step 2 :- Define variable name that we are interested in

varname <- c("DWMST","DWTOTA","LAI",  "NWTOTA", "SWMST","SWTOTA", "Latitude.x", "Longitude.x")

## Create dataframe
id <- expID[1]
Output <- Update_expID(id, varname)
N = length(expID)

# Get N versus Yield data from all the experiments andRBIND them to Output
for (i in 2:N){
  id <- expID[i]
  tmp_output <- Update_expID(id, varname)
  Output <- rbind(Output,tmp_output)
}

Calculate the plot means and then the maximum values of the variables

library(dplyr)

OUT <- data.table(Output)

OUT2 <- OUT[,list(DWMST=mean(DWMST, na.rm = TRUE),DWTOTA=mean(DWTOTA, na.rm = TRUE),LAI=mean(LAI, na.rm = TRUE), 
                  NWTOTA=mean(NWTOTA, na.rm = TRUE),SWMST=mean(SWMST, na.rm = TRUE),SWTOTA=mean(SWTOTA, na.rm = TRUE)), 
            by = c('ExpID', 'Treatment', 'Date', 'Ncombined', 'Cultivar', 'Irrigation')]


ndata <- unique(OUT2[,list(DWMST=max(DWMST, na.rm = TRUE),DWTOTA=max(DWTOTA, na.rm = TRUE), 
                  NWTOTA=max(NWTOTA, na.rm = TRUE),SWMST=max(SWMST, na.rm = TRUE),SWTOTA=max(SWTOTA, na.rm = TRUE)),
            by = c('ExpID','Treatment','Ncombined','Cultivar','Irrigation')])

#from here, I am selecting the dates when I got the maximum values and chosing those days from the firts file OUT to work with the raw data, not the means
ndata2 <- merge(OUT2, ndata, by = c('ExpID', 'Treatment', 'Ncombined', 'Cultivar', 'Irrigation'))

ndata3 <- filter(ndata2, SWMST.x-SWMST.y==0, ExpID!= "13", ExpID!="36", ExpID!= "14", ExpID!="7")

ndata4 <- select(ndata3, ExpID, Treatment, Ncombined, Cultivar, Irrigation, Date)

ndata5 <- merge(OUT,ndata4, by = c('ExpID', 'Treatment', 'Date', 'Ncombined', 'Cultivar', 'Irrigation'))

#d is the document with the data I want
d <- ndata5[!is.na(Ncombined), 
           list(ExpID, Treatment, Ncombined = as.numeric(Ncombined), Cultivar, Irrigation, DWTOTA, SWTOTA, NWTOTA, DWMST, SWMST, 
          yield = DWTOTA, sugar = SWTOTA, cane_yield = DWMST, cane_sugar = SWMST/DWMST, nitrogen_concentration = NWTOTA/DWTOTA)]

## Clean -Inf http://stackoverflow.com/a/12188551/199217
invisible(lapply(names(d),function(.name) set(d, which(is.infinite(d[[.name]])), j = .name,value =NA)))

Meta-analysis of N effects on yield, Sugar, and LAI

Exploratory Data Analysis

Our question is: Is sugarcane yield and sugar content limited by N and/or water availablility? Our Model: y = Nitrogen * Irrigation + epsilon where y is yield or sugar content (of total aboveground biomass, of cane)

Summarize

library(pander)
#Records where Cane Sugar Content are too low, it is because in some data DWMST is NA and it is giving cane_sugar=0
pander(d[cane_sugar < 0.1 & cane_sugar > 0])
d[cane_sugar < 0.1,`:=` (cane_sugar = NA)]

pander(data.frame(n = nrow(d)))
pander(summary(d[, list(Ncombined, yield, sugar, cane_yield, cane_sugar, nitrogen_concentration)], digits = 3))
pander(summary(lm(yield ~ Ncombined*Irrigation, data = d), digits = 3))
pander(summary(lm(cane_yield ~ Ncombined*Irrigation, data = d), digits = 3))
pander(summary(lm(cane_sugar ~ Ncombined*Irrigation, data = d), digits = 3))

Check distribution, outliers, odd variables

#dev.off helps me not to have problems running the ggplot
dev.off()
ggplot(data = d) + geom_histogram(aes(yield))

ggplot(data = d) + geom_histogram(aes(sugar))

ggplot(data = d) + geom_histogram(aes(cane_yield))

ggplot(data = d) + geom_histogram(aes(cane_sugar))

Look which ExpID had irrigation and non-irrigation

data(variables)
if(exists("variables"))pander(variables[variables$VariableName %in% colnames(ndata),])

ggplot(data = d) + geom_point(aes(Ncombined, yield, color = as.logical(Irrigation))) + 
  facet_wrap(~ExpID) + ylim(range(0, max(pretty(ndata$DWTOTA))))

#I won't use the variable sugar because there is few data
ggplot(data = d) + geom_point(aes(Ncombined, sugar, color = as.logical(Irrigation))) + 
  facet_wrap(~ExpID) + ylim(range(0, 4000))

ggplot(data = d) + geom_point(aes(Ncombined, cane_yield, color = as.logical(Irrigation))) + 
  facet_wrap(~ExpID) + ylim(range(0, 8000))

ggplot(data = d) + geom_point(aes(Ncombined, cane_sugar, color = as.logical(Irrigation))) + 
  facet_wrap(~ExpID) + ylim(range(0, 0.6))

ggplot(data = d) + geom_point(aes(Ncombined, nitrogen_concentration, color = as.logical(Irrigation))) + 
  facet_wrap(~ExpID) + ylim(range(0, 0.008))
##graphic with linear regression (by Ursula)

#yield
ggplot(data = d, aes(Ncombined, yield)) + 
geom_point(aes(Ncombined, yield ), alpha = 0.4) + 
stat_smooth(method=lm, level=0.9) +
ylim(0,9000) + 
xlab("amount of N (Kg/ha)") + ylab("Dry Weight of Above-ground Biomass (g/m2)")

ggplot(data = d, aes(Ncombined, log(yield))) + 
  geom_point(aes(Ncombined, log(yield) ), alpha = 0.4) + 
  stat_smooth(method=lm, level=0.9) +
  ylim(6.5,9.5) + 
  xlab("amount of N (Kg/ha)") + ylab("Log of the dry Weight of Above-ground Biomass (g/m2)") 

#cane yield
ggplot(data = d, aes(Ncombined, cane_yield)) + 
  geom_point(aes(Ncombined, cane_yield), alpha = 0.4) + 
  stat_smooth(method=lm, level=0.9) +
  ylim(0,7000) +
  xlab("amount of N (Kg/ha)") + ylab("Dry weight of the Cane Biomass (g/m2)")

ggplot(data = d, aes(Ncombined, log(cane_yield) )) + 
  geom_point(aes(Ncombined, log(cane_yield )), alpha = 0.4) + 
  stat_smooth(method=lm, level=0.9) +
  ylim(6.5,9.5) +
    xlab("amount of N (Kg/ha)") + ylab("Dry weight of the Cane Biomass (g/m2)")

#sugar in cane
ggplot(data = d, aes(Ncombined, cane_sugar)) + 
  geom_point(aes(Ncombined, cane_sugar), alpha = 0.4) + 
  stat_smooth(method=lm, level=0.9) + ylim(0.35,0.6) + 
  xlab("amount of N (Kg/ha)") + 
  ylab("Sucrose/Dry Weight, of the Cane Biomass")

#nitrogen in biomass
ggplot(data = d, aes(Ncombined,nitrogen_concentration) ) + 
  geom_point(aes(Ncombined, nitrogen_concentration), alpha = 0.4) + 
  stat_smooth(method=lm, level=0.9) + ylim(0,0.006) + 
  xlab("amount of N (Kg/ha)") + ylab("Nitrogen/Dry Weight, of the Above-ground Biomass")
####

Check basic models (testing normality with shapiro test and transforminng some variables, linear regression analysis (modified by Ursula))

#test
par(mfrow=c(2,2))

library(nlme)
install.packages("lme4")

hist(d$Ncombined)

shapiro.test(d$yield)
shapiro.test(log10(d$yield))
shapiro.test(log(d$yield))
shapiro.test(sqrt(d$yield))

d[,plot(Ncombined, log10(yield))]
plot(lm(log10(yield) ~ Ncombined, data = d), main = "Yield")
shapiro.test(d$cane_yield)
shapiro.test(log10(d$cane_yield))
shapiro.test(log(d$cane_yield))
shapiro.test(sqrt(d$cane_yield))
hist(d$cane_yield)
hist(log10(d$cane_yield))
plot(lm(log10(cane_yield) ~ Ncombined, data = d), main = "Cane Yield")

#this one (cane_sugar) doesn't fit in any transformation criteria
shapiro.test(d$cane_sugar)
shapiro.test(log10(d$cane_sugar))
shapiro.test(log(d$cane_sugar))
shapiro.test(sqrt(d$cane_sugar))
hist(d$cane_sugar)
plot(lm(cane_sugar ~ Ncombined, data = d), main = "Cane % Sugar")


shapiro.test(d$nitrogen_concentration)
shapiro.test(log10(d$nitrogen_concentration))
shapiro.test(log(d$nitrogen_concentration))
shapiro.test(sqrt(d$nitrogen_concentration))
hist(d$nitrogen_concentration)
plot(lm(nitrogen_concentration ~ Ncombined, data = d), main = "% Nitrogen")

Regression Models

These are throwing an error ... still need to debug.

Total Aboveground Biomass

summary(nlme::lme(fixed = log(yield) ~ Ncombined, random = ~1|ExpID, data = d, na.action = na.omit))

Cane_Yield

summary(nlme::lme(fixed = log(cane_yield) ~ Ncombined, random = ~1|ExpID, data = d, na.action = na.omit))

Sugar Content of Cane

summary(nlme::lme(fixed = cane_sugar ~ Ncombined, random = ~1|ExpID, data = d, na.action = na.omit))

Nitrogen Concentration

summary(nlme::lme(fixed = nitrogen_concentration ~ Ncombined, random = ~1|ExpID, data = d, na.action = na.omit))


ebimodeling/sugarbag documentation built on May 15, 2019, 7:32 p.m.