R/yieldPrev.R

Defines functions supVecMac scenarioAnalysis bartolomeoMod valiTrend loadYieldSession saveYieldSession responseYield randModel modSel cutTrend breakTrends sewTrends checkTrends configure

Documented in bartolomeoMod breakTrends checkTrends configure cutTrend loadYieldSession modSel responseYield saveYieldSession sewTrends valiTrend

	if(any(ls() == "yieldPrev")){}else{
	yieldPrev <-new.env()
	yieldPrev$.conflicts.OK<-c()
}
###CONFIGURATION
configure<-function(depth="base"){
	if(depth=="base"){
		cat(c("Do you have a single file database or official and simulation split in two different files? \n 1.single file \n 2. double files \n (answer by numer)"),fill=TRUE)
		numFiles<-scan(,nmax=1)
		while(numFiles != 1 & numFiles != 2){cat(c(" 1 and 2 are the only answers supported  \n 1.single file \n 2. double files \n "))
			numFiles<-scan(,nmax=1)
		}
		if(numFiles == 2){
			#find out data-files
			#cat("If you created your csv databases using MS Office, write <1> here, else <0>", fill=TRUE)
			#msoffice<-scan(,nmax=1)
			cat("Provide OFFICIAL yield database",fill=TRUE)
			offiData<-file.choose()
			write.table(gsub(gsub(scan(file=offiData,what="text",sep="\n"),pattern='"',replacement='',fixed=TRUE),pattern=";",replacement=","),file="adjOffi.csv",quote=FALSE,row.names=FALSE,col.names=FALSE)

		#	if(msoffice == 1) eurostat<-read.table(file.choose(), header=T, sep=";") else eurostat<-read.csv(file.choose())


			cat("Provide SIMULATE yield database",fill=TRUE)
			simuData<-file.choose()
			if(basename(simuData)=="PredictoIndicis.txt"){
				predInd<-read.csv(simuData)
				predInd$INDICATOR_VALUE<-as.numeric(as.character(predInd$INDICATOR_VALUE))
				#predInd$STAT_CROP_NO[which(predInd$STAT_CROP_NO==8)]<-"indica"
				#predInd$STAT_CROP_NO[which(predInd$STAT_CROP_NO==9)]<-"japonica"
				predIndL<-spread(predInd,INDICATOR_CODE,INDICATOR_VALUE)
				write.csv(predIndL,file="adjSimu.csv",row.names=FALSE)
			}else{
				write.table(gsub(gsub(scan(file=simuData,what="text",sep="\n"),pattern='"',replacement='',fixed=TRUE),pattern=";",replacement=","),file="adjSimu.csv",quote=FALSE,row.names=FALSE,col.names=FALSE)
			}
		}
		if(numFiles == 1){
			cat(c("Point out your database \n"))
			allData<-file.choose()
			write.table(gsub(gsub(scan(file=allData,what="text",sep="\n"),pattern='"',replacement='',fixed=TRUE),pattern=";",replacement=","),file="adjDtb.csv",quote=FALSE,row.names=FALSE,col.names=FALSE)
			longData<-read.csv("adjDtb.csv")

			bothData<-spread(longData,INDICATOR_CODE,INDICATOR_VALUE)
			write.csv(bothData[,c(which(names(bothData)=="YEAR"),which(names(bothData)=="official.yield"|names(bothData)=="Officiall yield"|names(bothData)=="OFFICIAL_YIELD"|names(bothData)=="Official.yield"|names(bothData)=="official_yield"|names(bothData)=="Official_yield"))],"adjOffi.csv",quote=FALSE,row.names=FALSE)
			write.csv(bothData[,c(-which(names(bothData)=="official.yield"|names(bothData)=="Officiall yield"|names(bothData)=="OFFICIAL_YIELD"|names(bothData)=="Official.yield"|names(bothData)=="official_yield"|names(bothData)=="Official_yield"))],"adjSimu.csv",quote=FALSE,row.names=FALSE)
		}
	}
	eurostat<-read.csv("adjOffi.csv")
	prev <- read.csv("adjSimu.csv")
	#here ends the importing data issue, from here they are only structured


actualYield<-unique(eurostat)
relatedModel<-unique(prev)

		if(any(names(actualYield)=="STAT_CROP_NO")&any(names(relatedModel)=="CROP_NO")){names(actualYield)[names(actualYield)=="STAT_CROP_NO"]<-"CROP_NO"}else{
		if(any(names(relatedModel)=="STAT_CROP_NO")&any(names(actualYield)=="CROP_NO")){names(relatedModel)[names(relatedModel)=="STAT_CROP_NO"]<-"CROP_NO"}}

		#standarizing Official Yield Names...trying to, at least
		if(any(names(actualYield)=="official.yield")){names(actualYield)[names(actualYield)=="official.yield"]<-"OFFICIAL_YIELD"}
		if(any(names(actualYield)=="Official yield")){names(actualYield)[names(actualYield)=="Official yield"]<-"OFFICIAL_YIELD"}
		if(any(names(actualYield)=="Official.yield")){names(actualYield)[names(actualYield)=="Official.yield"]<-"OFFICIAL_YIELD"}
		if(any(names(actualYield)=="official_yield")){names(actualYield)[names(actualYield)=="official_yield"]<-"OFFICIAL_YIELD"}
		if(any(names(actualYield)=="Official _yield")){names(actualYield)[names(actualYield)=="Official_yield"]<-"OFFICIAL_YIELD"}



	if(any(names(actualYield)=="CROP_NO")){
		#choice of crop parameters
		cat(c("OFFICIAL yield data contains information for the following CROPs:\n",unique(actualYield$CROP_NO),"\n Choose one:",fill=TRUE))
		cropO<-scan(,nmax=1)
		while(any(unique(actualYield$CROP_NO) == cropO) == FALSE){cat("point an existing one \n ")
			cropO<-scan(,nmax=1)}
			actualYield<-subset(actualYield,actualYield$CROP_NO == cropO)
			}
	if(any(names(relatedModel)=="CROP_NO")){
		cat(c("SIMULATED yield data contains information for the following CROPs:\n",unique(relatedModel$CROP_NO),"\n Choose one:",fill=TRUE))
		cropS<-scan(,nmax=1)
		while(any(unique(relatedModel$CROP_NO) == cropS) == FALSE){cat("point an existing one \n ")
		cropS<-scan(,nmax=1)}
		yieldPrev$saveCrop<-cropS
		relatedModel<-subset(relatedModel, relatedModel$CROP_NO== cropS)
	}else{cat(c("It seems you have one crop only: no data named CROP_NO found \n"))}

	if(any(names(actualYield)=="NUTS_CODE")){
		#choice of country
		actualYield$NUTS_CODE<-as.factor(actualYield$NUTS_CODE)
		cat(c("The following countries are provided in the DataBases:\n","OFFICIAL:",levels(actualYield$NUTS_CODE),"(case sensitive) \n"),fill=TRUE)
		cat(" OFFICIAL COUNTRY:")
		countryO<-scan(,what="text",nmax=1)
		while(any(levels(actualYield$NUTS_CODE) == countryO) == FALSE){cat("point an existing one \n ")
		countryO<-scan(,what="text",nmax=1)}
		actualYield<-subset(actualYield,actualYield$NUTS_CODE == countryO)
	}
	if(any(names(prev)=="NUTS_CODE")){
		prev$NUTS_CODE<-as.factor(prev$NUTS_CODE)
		cat(c("The following countries are provided in the DataBases: \n SIMULATION:",levels(prev$NUTS_CODE)," \n "))
		countryS<-scan(,what="text",nmax=1)
		while(any(levels(prev$NUTS_CODE) == countryS) == FALSE){cat("point an existing one \n ")
		countryS<-scan(,what="text",nmax=1)}
		yieldPrev$saveCountry<-countryS
		relatedModel<-subset(relatedModel, relatedModel$NUTS_CODE == countryS)
	}else{cat(c("It seems you have one nation only: no data named NUTS_CODE found \n"))}
		#subsetting conforming the configuration set above
		actualYield<-actualYield[,c(which(names(actualYield)=="YEAR"),which(names(actualYield)=="OFFICIAL_YIELD"))]
		yieldPrev$actualYield<-actualYield[order(actualYield$YEAR),]

	#reading datas for suitable informations
	currentYear<- max(unique(relatedModel$YEAR))
	yieldPrev$currentYear <- currentYear
	if(any(names(relatedModel)=="DECADE")){
		currentDecade<- max(subset(relatedModel,relatedModel$YEAR==currentYear)$DECADE)
		cat(c("It seems forecasting the year",currentYear,"with data till the ",currentDecade,"th decade"),fill=TRUE)
		cat(c("Do you want to change Decade assumption? \n "))
		changeYD<-scan(,what="text",nmax=1)
		while(!any(c(length(changeYD)==0,changeYD != "y", changeYD != "n"))){
			cat("answer y or n \n")
			changeYD<-scan(,what="text",nmax=1)
		}
		if(changeYD == "n"){
			yieldPrev$relatedModel<-subset(relatedModel,relatedModel$DECADE == currentDecade)[,c(-which(names(relatedModel)=="CROP_NO"),-which(names(relatedModel)=="DECADE"),-which(names(relatedModel)=="NUTS_CODE"))]
		}
		if(changeYD == "y"){
			cat(c("Digit desired Decade"))
			#suggest: loop checking that the desired decade is lower than the last one...
			currentDecade<-scan(,nmax=1)
			cat(c("Digit desired Year"))
			yieldPrev$currentYear <- scan(,nmax=1)
			yieldPrev$relatedModel<-subset(relatedModel,relatedModel$DECADE == currentDecade)[,c(-which(names(relatedModel)=="CROP_NO"),-which(names(relatedModel)=="DECADE"),-which(names(relatedModel)=="NUTS_CODE"))]
		}
	}
	yieldPrev$relatedModel<-yieldPrev$relatedModel[sapply(yieldPrev$relatedModel, function(x) !any(is.na(x)))]
}
		#save information for saveYieldSession()

####library(tidyr)
####library(spikeslab)

###working on official trends
####library(tseries)
####library(forecast)
####library(ggplot2)
#providiing function for check actual existance of trend
checkTrends<-function(){
	attach(yieldPrev)
	if(any(names(yieldPrev) == "flatYield")){}else{
		flatYield<-actualYield
		yieldPrev$flatYield<-actualYield }
	cat(c("\n Official yields have a " ,round((adf.test(flatYield$OFFICIAL_YIELD)$p.value)*100,2),"% of chances to have a trend."),fill=TRUE)
	cat(c(" \n Look the chart for the visual assessment. \n Plotted: \n BLACK:actual data \n BROWN:linear model \n RED:LOcal regrESSion \n ORANGE:spline regression"),fill=TRUE)
	offiPlot<-ggplot(flatYield,aes(x=YEAR, y=OFFICIAL_YIELD,group=1))+geom_point(color="black")+geom_line(color="black")+geom_smooth(method="lm",color="brown",se=FALSE)+geom_smooth(method="loess",color="red",se=FALSE)+geom_smooth(method="lm",formula= y~splines::bs(x,3),color="orange",se=FALSE)
	plot(offiPlot)
	cat(c(" \n Do you see a trend in the data?\n	(y / n) \n "),fill=TRUE)
	yieldPrev$flattyn<-scan(,what="text",nmax=1)
	while(!any(c(length(yieldPrev$flattyn)==0,yieldPrev$flattyn != "y" , yieldPrev$flattyn != "n"))){
		cat("answer y or n \n ")
		yieldPrev$flattyn<-scan(,what="text",nmax=1)}
#	cat(c("The time serie is ",(unique(max(flatYield$YEAR))-unique(min(flatYield$YEAR))), "years long, since",unique(min(flatYield$YEAR)),"to",unique(max(flatYield$YEAR))),fill=TRUE)
	#checking if there are troubles in the official data series.
	if(length(c(setdiff(seq(min(flatYield$YEAR),max(flatYield$YEAR)),flatYield$YEAR),flatYield$YEAR[duplicated(flatYield$YEAR)])) == 0) cat("") else cat(c("By the way there are \n MISSING:",setdiff(seq(min(flatYield$YEAR),max(flatYield$YEAR)),flatYield$YEAR)," \n REPLICATED: ",flatYield$YEAR[duplicated(flatYield$YEAR)]," \n"))
	detach(yieldPrev)
}


#working on official data's trend
sewTrends<-function(inizio,fine){
	tempLimit<-data.frame(begin=inizio,finish=fine)
	#attach(yieldPrev)
	yieldPrev$flatOff<- merge(yieldPrev$flatYield,yieldPrev$relatedModel,by="YEAR")
	#yieldPrev$flatOff<-flatOff
	normalizingTrend<-function(campo){ #campo is numeric!
		yieldPrev$flatOff[,campo]<-(yieldPrev$flatOff[,campo]/mean(yieldPrev$flatOff[,campo]))*100
	}
	lapply(X=seq(2,length(yieldPrev$flatOff)),FUN=normalizingTrend)

	#removing NA values, they mess up the lm....
	#flatOff<-yieldPrev$flatOff
	yieldPrev$flatOff<-yieldPrev$flatOff[sapply(yieldPrev$flatOff,function(flatOff)!any(is.na(flatOff)))]
	#flatOff<-yieldPrev$flatOff
	trendAN<-new.env()
	trendAN$normPlot<-ggplot(yieldPrev$flatOff)+geom_line(aes(x=YEAR,y=OFFICIAL_YIELD,group=1),color="red")+geom_smooth(method="loess",color="red",aes(x=YEAR,y=OFFICIAL_YIELD,group=1),se=FALSE)
	trendPlot<-function(yvar){

		trendAN$normPlot<-trendAN$normPlot + geom_smooth(data=yieldPrev$flatOff,method="loess", color="cyan", aes_(x=~YEAR, y=as.name(yvar),group=1),se=FALSE)

	}
	lapply(names(yieldPrev$flatOff[,c(-1)]),FUN=trendPlot)
	trendAN$PlotNormA<-trendAN$normPlot+geom_line(aes(x=YEAR,y=OFFICIAL_YIELD,group=1),color="red",size=1.5)+geom_smooth(method="loess",color="orange",aes(x=YEAR,y=OFFICIAL_YIELD),se=FALSE,size=1.5)+labs(x="YEARS", y="TREND")+ geom_rect(data=tempLimit, aes(xmin=begin, xmax=finish, ymin=-Inf, ymax=+Inf), fill='pink', alpha=0.5)
	#detach(yieldPrev)
	plot(trendAN$PlotNormA)

	#yieldPrev$friendShip<-data.frame(trendCoef=lm(formula=OFFICIAL_YIELD ~ YEAR,data=flatOff)$coefficients[2])
	#rownames(yieldPrev$friendShip[1,])<-"OFFICIAL_YIELD"
	#flatOff1<-yieldPrev$flatOff
	yieldPrev$flatOff<-subset(yieldPrev$flatOff,yieldPrev$flatOff$YEAR >= inizio & yieldPrev$flatOff$YEAR <= fine)
	#yieldPrev$flatOff<-flatOff2
	#flatOff<-flatOff2
	yieldPrev$friendShip<-data.frame(param=as.character(names(yieldPrev$flatOff)[(names(yieldPrev$flatOff) == "OFFICIAL_YIELD")]),trendCoef=as.numeric(lm(formula=OFFICIAL_YIELD ~ YEAR,data=yieldPrev$flatOff)$coefficients[2]))
	plot(trendAN$PlotNormA+stat_smooth(data=yieldPrev$flatOff,method="lm",color="black",aes(x=YEAR,y=OFFICIAL_YIELD,group=1),fullrange=FALSE,se=FALSE,size=1))
	flatOff1<-merge(yieldPrev$flatYield,yieldPrev$relatedModel,by="YEAR")
	yieldPrev$flatOff<-subset(flatOff1,flatOff1$YEAR >= inizio & flatOff1$YEAR <= fine)
	#yieldPrev$flatOff<-flatOff2
	#flatOff<-flatOff2
	mayTrend<-names(yieldPrev$flatOff)[(names(yieldPrev$flatOff)!= "YEAR" &  names(yieldPrev$flatOff)!= "OFFICIAL_YIELD")]
	friendTest<-function(mate){
		allIn<-yieldPrev$flatOff
		#formulFriend<-as.formula(paste(as.name(mate)," ~ YEAR",sep=""))
		formulFriend<-as.formula(paste(as.name(mate)," ~ seq(1,length(yieldPrev$flatOff[,1]))",sep=""))
		linMod<-lm(formula=formulFriend,data=allIn)
		yieldPrev$friendShip <- rbind(yieldPrev$friendShip,data.frame(param=paste(c(as.character(mate)),sep=""),trendCoef=as.numeric(linMod$coefficients[2])),deparse.level=1)
		#rownames(yieldPrev$friendShip[length(yieldPrev$friendShip[,1]),])<-as.name(mate)
		#print(c(paste(c(as.character(mate)),sep=""),linMod$coefficients[2]))
	}
	sapply(X=mayTrend,FUN=friendTest,USE.NAMES=TRUE)
	trendShip<-yieldPrev$friendShip
	yieldPrev$YieldTrend<-trendShip[1,2]
	#yieldPrev$yieldTrend<-YieldTrend
	cat(c("The found trend for Official_Yield is ",round((trendShip[1,2]),2),". \n The following are the similar trends available between the predictors: \n ->Absolute values \n -> Sorted by difference with Yield trend \n \n "),fill=TRUE)
	trendShip$trendDiff<-abs(trendShip[,2]-trendShip[1,2])
	trendMates<-trendShip[c(-1),]
	trendMates<-(trendMates[order(trendMates$trendDiff),])
	trendMates$diff_perC <- round(abs(trendMates$trendDiff)/abs(yieldPrev$YieldTrend),2)
	trendMates$ID <- seq(1,length(trendMates[,1]))
	print(trendMates)

	#continue to cut?
	cat(c("\n Do you want to continue removing the trend in official yields"),fill=TRUE)
	continueToCut<-scan(,what="text",nmax=1)
	while(!any(c(length(continueToCut)==0,continueToCut != "y" , continueToCut != "n"))){
		cat("answer y or n \n")
		continueToCut<-scan(,what="text",nmax=1)}
	if(continueToCut == "y"){
		#allow some sewing with Mates (in case which)
		cat(c("\n Does any of the predictors explain some of the Official's Trend? \n If yes, point which one(s) by ID (multiple answers allowed) \n If none of them does, return blank: \n"),fill=TRUE)
		mateList<-scan(,nmax=length(trendMates[,1]))
		if(length(mateList >= 1)){
			yieldPrev$safeTrend <- mean(abs(trendMates$trendCoef[c(mateList)]))
		}
		cutTrend(inizio,fine)
	}
}

breakTrends<-function(){ #this doesn't work
	attach(yieldPrev)
	flatPlot<-ggplot(yieldPrev$flatYield)+geom_point(color="black",aes(x=YEAR, y=OFFICIAL_YIELD,group=1))+geom_line(color="black",aes(x=YEAR, y=OFFICIAL_YIELD,group=1))+geom_smooth(method="lm",color="brown",se=FALSE,aes(x=YEAR, y=OFFICIAL_YIELD,group=1))+geom_smooth(method="loess",color="red",se=FALSE,aes(x=YEAR, y=OFFICIAL_YIELD,group=1))+geom_smooth(method="lm",formula= y~splines::bs(x,3),color="orange",se=FALSE,aes(x=YEAR, y=OFFICIAL_YIELD,group=1))
	if(any(names(yieldPrev) == "breakPoint")) flatPlot<-flatPlot + geom_rect(data=yieldPrev$breakPoint, aes(xmin=begin, xmax=finish, ymin=-Inf, ymax=+Inf), fill='yellow', alpha=0.3)
	plot(flatPlot)
	cat(c(" \n Time series starts in",unique(min(flatYield$YEAR)),"and ends in ",unique(max(flatYield$YEAR))),fill=TRUE)
		cat(c(" \n Point the trend's edges by year\n"),fill=TRUE)
		trendEdge<-scan(,nmax=2)
		tempLimit<-data.frame(begin=min(trendEdge),finish=max(trendEdge))
		#paint them and ask confirm.... this thing about asking confirm  is messy...
		cutPlot<-flatPlot + geom_rect(data=tempLimit, aes(xmin=begin, xmax=finish, ymin=-Inf, ymax=+Inf), fill='pink', alpha=0.5)
		#if(any(names(yieldPrev) == "breakPoint")) cutPlot<-cutPlotA + geom_rect(data=yieldPrev$breakPoint, aes(xmin=begin, xmax=finish, ymin=-Inf, ymax=+Inf), fill='yellow', alpha=0.3) else cutPlot <- cutPlotA
		plot(cutPlot)

	#confirm
	cat(c("\n Are the drawn lapse correct? \n (y/n)"),fill=TRUE)
	continCut <- scan(,what="text",nmax=1)
	while(!any(c(length(continCut)==0,continCut != "y" , continCut != "n"))){
		cat("answer y or n")
	continCut <- scan(,what="text",nmax=1)}
	if(continCut == "y"){
	sewTrends(min(trendEdge),max(trendEdge))}
	detach(yieldPrev)

}
####library(MASS)
cutTrend<-function(inizio,fine){
	if(any(names(yieldPrev) == "due2trend")){} else {
		yieldPrev$due2trend<-data.frame(YEAR=yieldPrev$actualYield$YEAR,trended=rep(0,length=length(yieldPrev$actualYield$YEAR)))
	}
	notSoFlat <-yieldPrev$flatYield
	#cutting trend from inizio to fine
	preflat<-subset(notSoFlat,notSoFlat$YEAR >= inizio & notSoFlat$YEAR <= fine)
	flatLin<-lm(OFFICIAL_YIELD~YEAR,data=preflat)
	#developing the flat "officials"
	cutEnv<-new.env()
    cutEnv$modello<-flatLin
    cutEnv$flatting<-preflat
    if(any(names(yieldPrev)=="safeTrend")){cutEnv$trendCorr<-(flatLin$coefficients[2]-yieldPrev$safeTrend)} else{ cutEnv$trendCorr<-flatLin$coefficients[2]}
    print(cutEnv$trendCorr)
    smootherer<-function(num){
		#attach(cutEnv)
		model<-cutEnv$modello
		#flat<-(model$model[num,1])-(model$model[num,2]-model$model[1,2])* cutEnv$trendCorr
		yieldPrev$due2trend[which(yieldPrev$due2trend$YEAR == model$model[num,2]),2]<-yieldPrev$due2trend[which(yieldPrev$due2trend$YEAR == model$model[num,2]),2]+(model$model[num,2]-model$model[1,2])* cutEnv$trendCorr
		#cutEnv$flatting[num,2]<-flat
		#print(flatting[num,])
		#detach(cutEnv)
	}
	lapply(X=seq(1,length(preflat$OFFICIAL_YIELD)),FUN=smootherer)
	#preflat<-cutEnv$flatting
	if(any(names(yieldPrev) == "breakPoint")) yieldPrev$breakPoint<-rbind(yieldPrev$breakPoint,c(inizio,fine,as.numeric(flatLin$coefficients[2]))) else yieldPrev$breakPoint<- data.frame(begin=inizio,finish=fine,trend=as.numeric(flatLin$coefficients[2]))

	trendInt<-function(anno){
		model<-cutEnv$modello
		interTrend<-(model$model[length(model$model[,2]),2]-model$model[1,2])* cutEnv$trendCorr
		yieldPrev$due2trend[which(yieldPrev$due2trend$YEAR == anno),2]<- yieldPrev$due2trend[which(yieldPrev$due2trend$YEAR == anno),2] + interTrend
	}

	if(length(subset(notSoFlat$YEAR,notSoFlat$YEAR > fine)) > 0){
		lapply(X=seq((fine+1),max(notSoFlat$YEAR)),FUN=trendInt)
		}
	#now we have to grant no more safeTrend will influence further trends!
	#yieldPrev$safeTrend<- NULL
	rm(list=c("safeTrend"),envir=yieldPrev)

	yieldPrev$flatYield$OFFICIAL_YIELD<-yieldPrev$actualYield$OFFICIAL_YIELD - yieldPrev$due2trend$trended
}


####library(leaps)
####library(HH)
####library(relaimpo)
modSel <- function(standardModel,rcrit){
	tableXregression <-merge(yieldPrev$flatYield , yieldPrev$relatedModel , by="YEAR")
	#clean this table, 0 columns are going to mess it up
	coluClean<-function(cola){
		if(min(tableXregression[,cola]) == max(tableXregression[,cola])){return(cola)}
	}
	dirtyCol<-lapply(X=seq(1,length(names(tableXregression))),FUN=coluClean)
	if(!is.null(unlist(dirtyCol))){tableXregression<-tableXregression[,-unlist(dirtyCol)]}
	if(missing(standardModel)){
		cat("Are you looking for a standard additive model? \n a models accounting for combined predictors. \n Which do you prefer? \n ")
 		cat(" 1. standard \n 2. enhanced \n ")
 		standardModel<-scan(,what="text",nmax=1)
		while(standardModel != "1" & standardModel != "2" & standardModel != "standard" & standardModel != "enhanced" & length(standardModel)==0 ){
			cat(" 1. standard \n 2. enhanced \n ")
			standardModel<-scan(,what="text",nmax=1)
		}
	}
	if(standardModel == "1" | standardModel == "standard"){
		allSign <- regsubsets(OFFICIAL_YIELD~.,data=tableXregression[,c(-which(names(tableXregression)=="YEAR"))],nbest=2,method="exhaustive",nvmax=4, really.big=TRUE)}
	if(standardModel == "2" | standardModel == "enhanced"){
		allSign <- regsubsets(OFFICIAL_YIELD~.^2+.,data=tableXregression[,c(-which(names(tableXregression)=="YEAR"))],nbest=2,nvmax=4, really.big=TRUE)}#,method="exhaustive"
	#renaming predictors with numbers
	summaSign<-summaryHH(allSign,names=seq(1,length(allSign$xnames)),statistics="adjr2")
	if(missing(rcrit)){plot(summaSign,col="green",cex=0.8)}
	#plot(summaryHH(allSign,scale="r2"))
#calc.relimp(yieldPrev$regrSW,type="car")
	print(summaSign)
	#print(summaryHH(allSign,abbrev=3,statistics="adjr2"))
	cat("\n Note: one of the accounted parameter is (Intercept) \n Select a model")
	if(missing(rcrit)){
	modId<-scan(,nmax=1)}else{modId<-which(summaSign$bic == min(summaSign$bic))[1]}
	yieldPrev$model_formula<-c("OFFICIAL_YIELD ~ ")
	compleFormula<-function(parametro){
		yieldPrev$model_formula<- paste(yieldPrev$model_formula, names(coef(allSign,id= modId))[parametro],if(parametro == length(names(coef(allSign,id= modId)))) sep=" " else sep= " +")
	}
	#print(as.formula(yieldPrev$model_formula))
	lapply(X=seq(2,length(names(coef(allSign,id= modId)))),FUN=compleFormula)
	regrSW<-lm(as.formula(yieldPrev$model_formula),data=tableXregression)
	relatedModel <- yieldPrev$relatedModel
	expYield<-predict(regrSW,newdata=subset(relatedModel, relatedModel$YEAR  == yieldPrev$currentYear),se.fit=TRUE,type="response",level=0.95,interval="prediction")
	yieldPrev$expYield <- expYield
	yieldPrev$modelLM<-regrSW
	yieldPrev$tableXregression<-tableXregression
	#cross validating removing each year once
	attach(yieldPrev)
	validC<-CV(yieldPrev$modelLM)
#	validC<-cv.lm(data=yieldPrev$tableXregression, yieldPrev$modelLM, m=((length(unique(yieldPrev$tableXregression$YEAR))-1)),printit=FALSE,plotit=FALSE)
	detach(yieldPrev)
	yieldPrev$CVmsRes<-c(validC[1],validC[5])
	#yieldPrev$CVmsRes<-attributes(validC)$ms
	cat(c("SOME INFOs ABOUT THIS MODEL:"),fill=T)
	try(if(standardModel == 1){
		genizi<-as.matrix(calc.relimp(yieldPrev$modelLM,type="genizi")$genizi)
		colnames(genizi)<-as.list("R2")
			cat(c("\n Decomposition of R2 accordingly to (Genizi,1993): \n"),fill=TRUE)
		print(genizi)
		},silent=T)
		cat(c("\n Regression coefficients:\n"),fill=T)
		print(yieldPrev$modelLM$coefficients)
}
####library(DAAG)


randModel<-function(){
	#experimenting Bayesian variable selection: "spike and slab"
	yieldPrev$modelBay_formula<-c("OFFICIAL_YIELD~ ")
	puntone<-spikeslab(OFFICIAL_YIELD~ . ,data=yieldPrev$tableXregression,max.var=4,fast=F,n.iter1=1000,n.iter2=1000,mse=T)
	lastra<-predict(puntone,newdata=subset(yieldPrev$relatedModel, yieldPrev$relatedModel$YEAR  == yieldPrev$currentYear))
	compleBayFormula<-function(parametro){
		yieldPrev$modelBay_formula<- paste(yieldPrev$modelBay_formula, puntone$names[puntone$gnet.obj.vars][parametro],if(parametro == length(puntone$gnet.obj.vars)) sep=" " else sep= " +")
	}
	#print(as.formula(yieldPrev$model_formula))
	lapply(X=seq(1,length(puntone$gnet.obj.vars)),FUN=compleBayFormula)
	regrGNET<-lm(as.formula(yieldPrev$modelBay_formula),data=yieldPrev$tableXregression)

	expBAYield<-predict(regrGNET,newdata=subset(yieldPrev$relatedModel, yieldPrev$relatedModel$YEAR  == yieldPrev$currentYear),se.fit=TRUE,type="response",level=0.95,interval="prediction")
	#cross validating removing each year once
	validC<-c(CV(regrGNET)[c(1,5)])
	#yieldPrev$CVmsRes<-c(validC[1],validC[5])

	cat(c("Spike&Slam suggests as model:\n 	",yieldPrev$modelBay_formula,"\n which fits adopting: \n	"))
	print(regrGNET$coefficients)
	cat(c("\n and obtains:\n"))
	print(validC)
	cat(c("\n forecasting ",round(expBAYield$fit[1],2) ,"+/-",round(expBAYield$fit[1]-expBAYield$fit[2],2),".\n"))
}


####library(bartMachine)
####library(pls)
responseYield<-function(){
	expYield <- yieldPrev$expYield
	knoTime<-yieldPrev$breakPoint
	if(max(knoTime$finish)== (yieldPrev$currentYear -1)){
		trendMissing<-mean(knoTime$trend[which(knoTime$finish == max(knoTime$finish))])+yieldPrev$due2trend$trended[which(yieldPrev$due2trend$YEAR == (yieldPrev$currentYear -1) )]} else {trendMissing <- yieldPrev$due2trend$trended[which(yieldPrev$due2trend$YEAR == (yieldPrev$currentYear -1) )] }
	cat(c(" \n \n \n RESPONSE \n \n ","As it is, the forecasted yield for year",yieldPrev$currentYear,"is",round(expYield$fit[1],2),"+/-",round(expYield$fit[1]-expYield$fit[2],2),"."),fill=TRUE)
	cat(c("Confidence = 95% \n	\n CROSS-VALIDATION \n ",round(yieldPrev$CVmsRes[1],2),"as mean square error and ",round(yieldPrev$CVmsRes[2],2),"as R2."))

	try(scenarioAnalysis(),silent=T)

if(any(names(yieldPrev) == "due2trend")){cat(c("\n \n Due to the marked trends, the forecasted has to be corrected with ",round(trendMissing,2)," resulting, so, as ",round(expYield$fit[1]+trendMissing,2),". \n
"),fill=TRUE)}
try(bartolomeoMod(depthing=F),silent=TRUE)
try(supVecMac(),silent=T)
cat(c("	\n
	TimeSeries statistical analysis over OFFICIAL_YIELD would bet on ",round(forecast(ets(yieldPrev$actualYield[,2]),h=1)$mean[1],2)," +/- ",round((forecast(ets(yieldPrev$actualYield[,2]),h=1)$upper[2]-forecast(ets(yieldPrev$actualYield[,2]),h=1)$mean[1]),2),"."))


	#last years everage
	lastResults<-yieldPrev$actualYield[which(yieldPrev$actualYield$YEAR>=yieldPrev$currentYear-5),which(names(yieldPrev$actualYield)=="OFFICIAL_YIELD")]
	cat(c("\n	In the last ",length(lastResults)," (",yieldPrev$currentYear-1,"-", yieldPrev$currentYear-5, ") the average yield was ",round(mean(lastResults),2)," +/- ",round(var(lastResults),2),"."))

	#crossVal plot
	#acquire loocv single data
	motoCross<-cv.lm(data=yieldPrev$tableXregression, formula(yieldPrev$modelLM), m=length(yieldPrev$tableXregression[,1]),printit=FALSE)
	#keep out Year and cvpred
	crValPre<-data.frame(YEAR=motoCross$YEAR,pred=motoCross$cvpred)
	respoPlot<- ggplot(crValPre)
	cat(c(" \n Plotted:\n GREEN: Predicted yield in Cross Validation \n RED: Data on which models are regressed"),fill=TRUE)
	if(any(names(yieldPrev) == "due2trend")){
		untrend<-yieldPrev$flatYield
		yieldPrev$omniYield<-merge(crValPre,yieldPrev$due2trend,by="YEAR")
		yieldPrev$omniYield<-rbind(yieldPrev$omniYield,c(yieldPrev$currentYear,expYield$fit[1],trendMissing))
		yieldPrev$omniYield$YIELD<-yieldPrev$omniYield$pred+yieldPrev$omniYield$trended

		respoPlot <- respoPlot+geom_line(aes(x=YEAR,y=OFFICIAL_YIELD),color="black",size=1.5,data=yieldPrev$actualYield)+geom_line(aes(x=YEAR,y=YIELD),color="blue",data=yieldPrev$omniYield)
		cat(c(" BLUE: Predicted yield, restored trend \n BLACK: Real data \n "),fill=TRUE)
	}
	crValPre<-rbind(crValPre,c(yieldPrev$currentYear,expYield$fit[1]))
	respoPlot<-respoPlot+geom_line(aes(x=YEAR,y=OFFICIAL_YIELD,group=1),color="red",size=1.5,data=yieldPrev$flatYield)+geom_line(aes(x=YEAR,y=pred,group=1),color="green",data=crValPre)
	plot(respoPlot)
	#dev.new()
	#plot(yieldPrev$PCmodel,line=1)
	if(any(names(yieldPrev)=="breakPoint")){try(valiTrend(),silent=TRUE)}
}

saveYieldSession<- function(){
	attach(yieldPrev)
	save(list=ls(yieldPrev),file=paste(Sys.Date(),".RData",sep=""))
	cat(c("Session saved in ",paste(Sys.Date(),saveCountry,saveCrop,".RData",sep=""),". \n Use loadYieldSession() to restore in future sessions."),fill=TRUE)
}
loadYieldSession<-function(){
	cat(c("Select the desired previous session saved \n "),fill=TRUE)
	oldYieldSession<-file.choose()
	load(file=oldYieldSession,envir=yieldPrev)
	cat(c("Session in oldYieldSession loaded \n "),fill=TRUE)
}

valiTrend<-function(){
	#obtaind clean informations for where the trend doesn't exist
	train<-subset(yieldPrev$due2trend,yieldPrev$due2trend$trended == 0)
	train<-merge(train,yieldPrev$actualYield, by="YEAR")
	train$trended<-NULL
	train<-merge(train,yieldPrev$relatedModel, by="YEAR")
	#getting a model only based on untrend data
	noTrendMod<-lm(as.formula(yieldPrev$model_formula),data=train)
	#test set
	test<-subset(yieldPrev$due2trend,yieldPrev$due2trend$trended != 0)
	test<-merge(test,yieldPrev$relatedModel,by="YEAR")
	test$trended<-NULL
	solution<-subset(yieldPrev$due2trend,yieldPrev$due2trend$trended != 0)
	solution<-merge(solution,yieldPrev$flatYield,by="YEAR")
	solution$trended<-NULL

	woTrend<-predict(noTrendMod,newdata=test,se.fit=TRUE,type="response",level=0.95,interval="prediction")

	predCfg<-merge(solution,yieldPrev$omniYield,by="YEAR")
	predCfg$clean<-woTrend$fit[,1]
	DnoTREND<-predCfg$YIELD-predCfg$pred
	DwTREND<-predCfg$OFFICIAL_YIELD-predCfg$clean

	sigNO<-sqrt(mean((DnoTREND - mean(DnoTREND))^2))
	sigW<-sqrt(mean((DwTREND - mean(DwTREND))^2))
	if(sigNO < sigW){cat(c("NOTE that the pointed Trends are afflicted by some kind of problem, a BETTER FITting model can be obtained WITHout any TREND removal. \n"))}
	asimErr<-skewness(woTrend$fit[,1]-solution$OFFICIAL_YIELD)
	predError<-woTrend$fit[,1]-solution$OFFICIAL_YIELD
	sigma<-sqrt(mean((predError - mean(predError))^2))
	danger<-asimErr/sigma
	if(danger >= 2.6){cat(c("\n ADVICE: \n The marked trend related dynamics don't fit with the data! \n "))}
	if(sigNO < sigW|danger >= 2.6){cat(c("Do you want to reset the trend marked and proceed again? (y/n) \n"),fill=T)
		reBea<-scan(,what="text",nmax=1)
		while(!any(c(length(reBea)==0,reBea != "y" , reBea != "n"))){
			cat("answer y or n")
			reBea<-scan(,what="text",nmax=1)
		}
		if(reBea == "y"){
			#reset trend's stuff
			rm(list=c("breakPoint","flatYield","due2trend","friendShip","flattyn","safeTrend","yieldTrend","flatOff","tableXregression","model_formula","CVmsRes","expYield","omniYield","modelLM","PCmodel"),envir=yieldPrev)
			#then, start again
			virgilio()}
	}
}

#bayesian variance scomposition
bartolomeoMod<-function(cpusa=1,depthing=T){
	suppressMessages(library(bartMachine))
	if(cpusa>1){
		set_bart_machine_num_cores(cpusa)
	}
	anno<-yieldPrev$currentYear
	paese<-yieldPrev$saveCountry
	coltura<-yieldPrev$saveCrop
	if(depthing){
		BARmodel<-bartMachineCV(yieldPrev$tableXregression[,-c(1,2)],yieldPrev$tableXregression[,2])
	}else{
		BARmodel<-bartMachine(yieldPrev$tableXregression[,-c(1,2)],yieldPrev$tableXregression[,2],verbose=F)
	}
	correnti<-names(subset(yieldPrev$relatedModel, yieldPrev$relatedModel$YEAR  == yieldPrev$currentYear))
	predetti<-names(yieldPrev$tableXregression[,-c(1,2)])
	buono<-subset(yieldPrev$relatedModel, yieldPrev$relatedModel$YEAR  == yieldPrev$currentYear)[,unlist(lapply(X=correnti,FUN=function(nome){if(any(nome==predetti)){return(which(correnti==nome))}}))]
	BARpred<-calc_credible_intervals(BARmodel,buono)
	predCR<-mean(BARpred)
	errCR<-predCR-BARpred[1]

	#cat(c("=========================================================="),fill=T)
	#cat(c("=========================================================="),fill=T)
	#cat(c("Bayesian Additive Regression Tree (BART) running on provided data for crop ", coltura," in ",paese," esteems the yield for ",anno, " at ",round(predCR,2),"+/-",round(errCR,2),".\n"))
	cat(c("\nBayesian Additive Regression Tree (BART) running on provided data for crop ", coltura," in ",paese," esteems the yield for ",anno, " at ",round(predCR,2),"+/-",round(errCR,2),".\n"))
	#cat(c("=========================================================="),fill=T)
	#cat(c("=========================================================="),fill=T)
}

#splitted to get stabler the work
scenarioAnalysis<-function(){
	pcr_model<-pcr(OFFICIAL_YIELD ~ . ,data=yieldPrev$tableXregression,scale=TRUE,validation="LOO",ncomp=4)
	pcr4<-predict(pcr_model,newdata=subset(yieldPrev$relatedModel, yieldPrev$relatedModel$YEAR  == yieldPrev$currentYear),ncomp=4)
	pcr3<-predict(pcr_model,newdata=subset(yieldPrev$relatedModel, yieldPrev$relatedModel$YEAR  == yieldPrev$currentYear),ncomp=3)
	yieldPrev$PCmodel<-pcr_model
	cat(c("\n \n Principal Component Regression (PCR) predicted \n ",round(pcr4,2),"+/-",round(RMSEP(yieldPrev$PCmodel)$val[8],2)," using 4 components \n ",round(pcr3,2),"+/-",round(RMSEP(yieldPrev$PCmodel)$val[6],2)," using 3 components."))
}

#Support Vector Machine (instead of lasso and/or elastic-net)
supVecMac<-function(){
	vectorMachineModel<-svm(as.matrix(yieldPrev$tableXregression[,-c(which(names(yieldPrev$tableXregression)=="YEAR"),which(names(yieldPrev$tableXregression)=="OFFICIAL_YIELD"))]),yieldPrev$tableXregression[,which(names(yieldPrev$tableXregression)=="OFFICIAL_YIELD")])
	svmYield<-predict(vectorMachineModel,subset(yieldPrev$relatedModel[,-which(names(yieldPrev$tableXregression)=="YEAR")], yieldPrev$relatedModel$YEAR  == yieldPrev$currentYear))
	cat(c("\n Support Vector Machine predicted ",round(svmYield,2),"."))
}
FoscoV/foreYield documentation built on June 20, 2021, 7:17 p.m.