Load libraries
library(MASS) # For ginv function # library(FactoMineR) library(signal) # library(plyr) library(caret) # library(dplyr) library(prospectr) library(rnirs) # library(ggplot2) # library(plotly) library(nirsextra) source("Script_R_2020/sp2dfclo.R") source('Script_R_2020/SIGNE_maha0.R')
Load globalmatix
# brb3="/home/ecarnot/Documents/INRA/Projets/SIGNE/2019/globalmatrix" brb3="globalmatrix" load(file=brb3) # sp=globalmatrix
Set parameters
### FIXATION DES PARAMETRES UTILISES: ## Nombre de repetitions de la boucle de PLSDA: repet= 2 ## Parametres du Savitsky-Golay (p=degre du polynome, n= taille de la fenetre, m=ordre de derivation) p=3 #2 n=11#11 #Faire avec un n plus gros ? m=1 #1 ## Nombre de VL max autorisees ncmax=30 #35 # Enrichissement enrich=0 # Seuil Rapdista srap=1.5
Pretreatments
# p=rbind(list('adj',''),list('red',c(1000,1,1)),list('snv',''),list('sder',c(2,3,w)),list('red',c(w/2,w/2,1))) pr=rbind(list('adj',c(551,1352)),list('adj',c(552,1352)),list('snv',''),list('sder',c(m,p,n)),list('red',c(n/2,n/2,1))) # pr=rbind(list('adj',c(551,1352)),list('adj',c(552,1352)),list('snv',''),list('sder',c(m,p,n))) sp_pre=pre(globalmatrix,pr)
Preparation du jeu initial
cepage="G" sp=sp_pre cep=as.factor(substr(rownames(sp),9,9)) clo=as.factor(substr(rownames(sp),9,13)) sp=sp2dfclo(sp,cep,clo) sp$annee=as.factor(substr(rownames(sp),1,4)) sp$parcelle=as.factor(substr(rownames(sp),18,18)) sp$date=as.factor(substr(rownames(sp),1,8)) ## On enleve 20190628, jour a + de 43°C ? sp=sp[sp$date != "20190628" & sp$parcelle=="G" & sp$cep==cepage,] # sp=sp[sp$parcelle=="G",] # sp=sp[sp$parcelle=="G" & sp$date != "20190730",] sp=droplevels(sp)
annees=c("2017","2018","2019") dat=data.frame() an=NULL typ=NULL LV=NULL for (i in 1:3) { if (enrich>0) { Fdatval=sort(unique(sp$date[sp$annee==annees[i]]))[1:enrich] idcal= which(sp$annee !=annees[i] | is.element(sp$date,Fdatval)) # sp$date==Fdatval } else { idcal= which(sp$annee !=annees[i]) } spcal= droplevels(sp[idcal,]) spval=droplevels(sp[-idcal,]) predmF=as.data.frame(matrix(nrow = nrow(spval), ncol = ncmax)) distances=as.data.frame(matrix(nrow = nrow(spval), ncol = ncmax)) rapdista=as.data.frame(matrix(nrow = nrow(spval), ncol = ncmax)) perokm =as.data.frame(matrix(nrow = 1, ncol = ncmax)) lost_rap=as.data.frame(matrix(nrow = 1, ncol = ncmax)) rplsda=caret::plsda(spcal$x, spcal$clo,ncomp=ncmax) sccal=rplsda$scores spval_c=scale(spval$x,center=rplsda$Xmeans,scale = F) scval=spval_c%*%rplsda$projection # score_val=predict(rplsda,sc_val,type="scores") : ne marche pas for (ii in 2:ncmax) { predmF[,ii]=SIGNE_maha0(sccal[,1:ii], spcal$clo, scval[,1:ii])$class distances[,ii]=SIGNE_maha0(sccal[,1:ii], spcal$clo, scval[,1:ii])$dist dist_sort=t(apply(distances[,ii],1,"sort"))[,1:2] rapdista[,ii]=dist_sort[,2]/dist_sort[,1] iok=rapdista[,ii]>srap ts=table(predmF[iok,ii],spval$clo[iok]) perokm[ii]=100*sum(diag(ts))/sum(iok) lost_rap[ii]=100*sum(iok)/nrow(spval) } tsm=lapply(as.list(predmF), spval$clo, FUN = table) diagsm=lapply(tsm, FUN = diag) peroktt =100*unlist(lapply(diagsm, FUN = sum))/nrow(spval) dat=c(dat,peroktt[-1]) # plot(perokm, xlab= "Nombre de VL", ylab = "Pourcentage de biens class?s",pch=19, cex=1.5) # print(c(max(perokm),which.max(perokm))) # cat("\n\nannée prédite = ", annees[i]," cepage = ", cepage,"\n") # cat("Pourcentage bien classé / Tous échentillons\n") # print(peroktt) # cat("\nPourcentage bien classé / sans indiv. proches\n") # print(as.matrix(perokm)) dat=c(dat,perokm[-1]) an=c(an,rep(annees[i],ncmax-1),rep(annees[i],ncmax-1)) typ=c(typ,rep("All smpls",ncmax-1),rep("Unambig. smpls",ncmax-1)) LV=c(LV,rep(1:(ncmax-1),2)) } df=data.frame(LV,per=unlist(dat),an,typ) ggplot(df, aes(x = LV, y = per)) + geom_line(aes(color = an, linetype = typ)) # # ggplot(data=df, mapping=aes(x=LV, y=per)) + # geom_line(mapping=aes(colour=an)) + # geom_hline(yintercept=c(-1,1)*qnorm(0.95), color="orange") + # geom_hline(yintercept=c(-1,1)*qnorm(0.99), color="darkred")
Essai sur la manière dont se repartissent les points selon rapdist
library(ggplot2) rt=1/c(1.0001,1.2,1.4,1.5,2) A=c(0,0) B=c(1,1) dfAB=data.frame(rbind(A,B)) dfAB=cbind(dfAB,rownames(dfAB)) colnames(dfAB)=c("x","y","rrep") df=data.frame(x=double(),y=double(),rrep=character()) for (j in 1:length(rt)) { r=rt[j] xt=seq(-0.5,1,0.005) y1=as.numeric() y2=as.numeric() rrep=character() for (i in 1:length(xt)){ x=xt[i] a=1-r^2 b=-2*A[2]+2*r^2*B[2] c=x^2*(1-r^2)+x*(-2*A[1]+2*r^2*B[1])+A[1]^2+A[2]^2-r^2*B[1]-r^2*B[2] y1=c(y1,(-b-sqrt(b^2-4*a*c))/(2*a)) y2=c(y2,(-b+sqrt(b^2-4*a*c))/(2*a)) rrep=c(rrep,1/r) } x=c(xt,xt) y=c(y1,y2) rrep=c(rrep,rrep) df=rbind(df,data.frame(x=x,y=y,rrep=substr(rrep,1,3))) # df=rbind(df,data.frame(rrep=substr(rrep,1,3),xt,y1,y2)) # # points(B[1],B[2],col='red') # lines(xt,y1) # lines(xt,y2) } df=df[df$y>-1 | !is.na(df$x),] df=rbind(df,dfAB) # gg = ggplot(df,aes(x=x,y=y, group=rrep)) + geom_point() +geom_line(aes(color=rrep)) + xlim(A[1],B[1]) + ylim(A[2],B[2]) + geom_point(aes(x=A[1], y=A[2])) + geom_point(aes(x=B[1], y=B[2])) n=nrow(df) gg = ggplot(df,aes(x=x,y=y)) +geom_point(data=df[-c(n-1,n),],aes(color=rrep)) + xlim(A[1],B[1]) + ylim(A[2],B[2]) + labs(color="Distance ratio") + geom_point(data=df[c(n-1,n),],aes(x=x,y=y))+ geom_text(data=df[c(n-1,n),],aes(x=x,y=y,label=rrep),hjust=-0.5, vjust=-0.5) gg
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.