In martinEcarnot/signe: Useful functions to handle spectra

Load libraries

library(MASS) # For ginv function
# library(FactoMineR)
library(signal)
# library(plyr)
library(caret)
# library(dplyr)
library(prospectr)
library(rnirs)
# library(ggplot2)
# library(plotly)
library(nirsextra)

source("Script_R_2020/sp2dfclo.R")
source('Script_R_2020/SIGNE_maha0.R')

Load globalmatix

# brb3="/home/ecarnot/Documents/INRA/Projets/SIGNE/2019/globalmatrix"
brb3="globalmatrix"
load(file=brb3)
# sp=globalmatrix

Set parameters

### FIXATION DES PARAMETRES UTILISES:
## Nombre de repetitions de la boucle de PLSDA:
repet= 2
## Parametres du Savitsky-Golay (p=degre du polynome, n= taille de la fenetre, m=ordre de derivation)
p=3 #2
n=11#11 #Faire avec un n plus gros ?
m=1 #1
## Nombre de VL max autorisees
ncmax=30 #35
# Enrichissement
enrich=0
# Seuil Rapdista
srap=1.5

Pretreatments

# p=rbind(list('adj',''),list('red',c(1000,1,1)),list('snv',''),list('sder',c(2,3,w)),list('red',c(w/2,w/2,1)))
pr=rbind(list('adj',c(551,1352)),list('adj',c(552,1352)),list('snv',''),list('sder',c(m,p,n)),list('red',c(n/2,n/2,1)))
# pr=rbind(list('adj',c(551,1352)),list('adj',c(552,1352)),list('snv',''),list('sder',c(m,p,n)))
sp_pre=pre(globalmatrix,pr)

Preparation du jeu initial

cepage="G"
sp=sp_pre
cep=as.factor(substr(rownames(sp),9,9))
clo=as.factor(substr(rownames(sp),9,13))
sp=sp2dfclo(sp,cep,clo)
sp$annee=as.factor(substr(rownames(sp),1,4))
sp$parcelle=as.factor(substr(rownames(sp),18,18))
sp$date=as.factor(substr(rownames(sp),1,8))
## On enleve 20190628, jour a + de 43°C ?
sp=sp[sp$date != "20190628" & sp$parcelle=="G" & sp$cep==cepage,]
# sp=sp[sp$parcelle=="G",]
# sp=sp[sp$parcelle=="G" & sp$date != "20190730",]
sp=droplevels(sp)

annees=c("2017","2018","2019")
dat=data.frame()
an=NULL
typ=NULL
LV=NULL
for (i in 1:3) {
  if (enrich>0) {
    Fdatval=sort(unique(sp$date[sp$annee==annees[i]]))[1:enrich]
    idcal= which(sp$annee !=annees[i] | is.element(sp$date,Fdatval))  # sp$date==Fdatval
  } else {
      idcal= which(sp$annee !=annees[i])
  }
spcal= droplevels(sp[idcal,])
spval=droplevels(sp[-idcal,])
predmF=as.data.frame(matrix(nrow = nrow(spval), ncol = ncmax))
distances=as.data.frame(matrix(nrow = nrow(spval), ncol = ncmax))
rapdista=as.data.frame(matrix(nrow = nrow(spval), ncol = ncmax))
perokm =as.data.frame(matrix(nrow = 1, ncol = ncmax))
lost_rap=as.data.frame(matrix(nrow = 1, ncol = ncmax))

rplsda=caret::plsda(spcal$x, spcal$clo,ncomp=ncmax)
sccal=rplsda$scores
spval_c=scale(spval$x,center=rplsda$Xmeans,scale = F)
scval=spval_c%*%rplsda$projection  # score_val=predict(rplsda,sc_val,type="scores") : ne marche pas

for (ii in 2:ncmax) {
  predmF[,ii]=SIGNE_maha0(sccal[,1:ii], spcal$clo, scval[,1:ii])$class
  distances[,ii]=SIGNE_maha0(sccal[,1:ii], spcal$clo, scval[,1:ii])$dist
  dist_sort=t(apply(distances[,ii],1,"sort"))[,1:2]
  rapdista[,ii]=dist_sort[,2]/dist_sort[,1]
  iok=rapdista[,ii]>srap
  ts=table(predmF[iok,ii],spval$clo[iok])
  perokm[ii]=100*sum(diag(ts))/sum(iok)
  lost_rap[ii]=100*sum(iok)/nrow(spval)
}

tsm=lapply(as.list(predmF), spval$clo, FUN = table)
diagsm=lapply(tsm, FUN = diag)
peroktt =100*unlist(lapply(diagsm, FUN = sum))/nrow(spval)

dat=c(dat,peroktt[-1])

# plot(perokm, xlab= "Nombre de VL", ylab = "Pourcentage de biens class?s",pch=19, cex=1.5)
# print(c(max(perokm),which.max(perokm)))

# cat("\n\nannée prédite = ", annees[i],"      cepage = ", cepage,"\n")
# cat("Pourcentage bien classé / Tous échentillons\n")
# print(peroktt)
# cat("\nPourcentage bien classé / sans indiv. proches\n")
# print(as.matrix(perokm))

dat=c(dat,perokm[-1])

an=c(an,rep(annees[i],ncmax-1),rep(annees[i],ncmax-1))
typ=c(typ,rep("All smpls",ncmax-1),rep("Unambig. smpls",ncmax-1))
LV=c(LV,rep(1:(ncmax-1),2))
}

df=data.frame(LV,per=unlist(dat),an,typ)

ggplot(df, aes(x = LV, y = per)) +
  geom_line(aes(color = an, linetype = typ)) 
# 
# ggplot(data=df, mapping=aes(x=LV, y=per)) +
#   geom_line(mapping=aes(colour=an)) +
#   geom_hline(yintercept=c(-1,1)*qnorm(0.95), color="orange") +
#   geom_hline(yintercept=c(-1,1)*qnorm(0.99), color="darkred")

Essai sur la manière dont se repartissent les points selon rapdist

library(ggplot2)
rt=1/c(1.0001,1.2,1.4,1.5,2)


A=c(0,0)
B=c(1,1)
dfAB=data.frame(rbind(A,B))
dfAB=cbind(dfAB,rownames(dfAB))
colnames(dfAB)=c("x","y","rrep")
df=data.frame(x=double(),y=double(),rrep=character())

for (j in 1:length(rt)) {

  r=rt[j]
  xt=seq(-0.5,1,0.005)
  y1=as.numeric()
  y2=as.numeric()
  rrep=character()

  for (i in 1:length(xt)){
    x=xt[i]
    a=1-r^2
    b=-2*A[2]+2*r^2*B[2]
    c=x^2*(1-r^2)+x*(-2*A[1]+2*r^2*B[1])+A[1]^2+A[2]^2-r^2*B[1]-r^2*B[2]
    y1=c(y1,(-b-sqrt(b^2-4*a*c))/(2*a))
    y2=c(y2,(-b+sqrt(b^2-4*a*c))/(2*a))
    rrep=c(rrep,1/r)
  }
  x=c(xt,xt)
  y=c(y1,y2)
  rrep=c(rrep,rrep)
  df=rbind(df,data.frame(x=x,y=y,rrep=substr(rrep,1,3)))
  # df=rbind(df,data.frame(rrep=substr(rrep,1,3),xt,y1,y2))

# 

  # points(B[1],B[2],col='red')
  # lines(xt,y1)
  # lines(xt,y2)
}
df=df[df$y>-1 | !is.na(df$x),]
df=rbind(df,dfAB)

# gg = ggplot(df,aes(x=x,y=y, group=rrep)) + geom_point() +geom_line(aes(color=rrep)) + xlim(A[1],B[1]) + ylim(A[2],B[2]) + geom_point(aes(x=A[1], y=A[2])) + geom_point(aes(x=B[1], y=B[2]))

n=nrow(df)
gg = ggplot(df,aes(x=x,y=y)) +geom_point(data=df[-c(n-1,n),],aes(color=rrep)) + xlim(A[1],B[1]) + ylim(A[2],B[2]) + labs(color="Distance ratio") + geom_point(data=df[c(n-1,n),],aes(x=x,y=y))+ geom_text(data=df[c(n-1,n),],aes(x=x,y=y,label=rrep),hjust=-0.5, vjust=-0.5)
gg