#' ################################################################################################
#' #' Features analysis
#' #' Produces Figure 2, Figure S1 E
#' #'
#' #'Analysis the relationship between images features and aesthetic scores
#' #'
#' #' @author Nicolas Mouquet, \email{nicolas.mouquet@@cnrs.fr},
#' #' Juliette Langlois, \email{juliette.a.langlois@@gmail.com},
#' #' François Guilhaumon, \email{francois.guilhaumon@@ird.fr}
#' #' Nicolas Casajus, \email{nicolas.casajus@@fondationbiodiversite.fr}
#' #'
#' #' @date 2021/05/17 first created
#' ################################################################################################
rm(list=ls(all=TRUE))
pathresults <- here::here("results","features")
####Load and combine the data----
cluster <- read.csv(here::here("results","features", "cluster.csv"))
cluster <- cluster[,-1]
rownames(cluster) <- cluster$name
lumsat <- read.csv(here::here("results","features", "lumsat.csv"))
lumsat <- lumsat[,-1]
rownames(lumsat) <- lumsat$name
momocs <- read.csv(here::here("results","features", "momocs.csv"))
momocs <- momocs[,-1]
rownames(momocs) <- momocs$name
esthe_focus_images <- read.csv(here::here("results","deep", "06_esthe_focus.csv"))
rownames(esthe_focus_images) <- esthe_focus_images$name_worms
tmp12 <- merge(cluster, lumsat, by=0, all=T)
rownames(tmp12) <- tmp12$Row.names; tmp12$Row.names <- NULL; tmp12$name.x <- NULL; tmp12$name.y <- NULL
tmp123 <- merge(tmp12, momocs, by=0, all=T)
rownames(tmp123) <- tmp123$Row.names;tmp123$Row.names <- NULL;tmp123$name <- NULL
datall <- merge(tmp123, esthe_focus_images, by=0, all=T)
rownames(datall) <- datall$Row.names; datall$Row.names <- NULL;datall$image <- NULL
#create a new colum to differenciat between the species who have a direct
#evaluation of aesthetics from others
datall$campg <- "ALL"
datall$campg[(datall$mayo_campgn==1)]="EVAL"
datall$campg[(datall$fisheyes_campgn==1)]="EVAL"
rm(tmp12)
rm(tmp123)
rm(cluster)
rm(lumsat)
rm(momocs)
rm(esthe_focus_images)
all_id <- c("CL_cie_d_mean","CL_cie_d_sd","CL_hullarea","SH_n.core.cell_mean","SH_n.core.cell_sd",
"SH_perimeter_mean","SH_perimeter_sd","SH_perim.area.ratio_mean","SH_perim.area.ratio_sd",
"SH_core.area.index_mean","SH_core.area.index_sd","LS_mean_satu","LS_sd_satu","LS_mean_light",
"LS_sd_light","MO_Fourier_pc1","MO_Fourier_pc2")
#end----
####Multiple linear regression FIGURE 2----
varcor="esthe_pred"
#subset the data for which we have esthe evaluation by humans
datacor <- datall[datall$focus==1,]
#Look at the correlation between variables and remove the variable with pearson > 0.7
cormat <- function(idcor,data,thr){
#idcor=all_id
#data=data_fisheyes
#thr=0.7
remove <- function(id_name,rem) {
for (i in 1:length(rem)) id_name=id_name[id_name!=rem[i]]
return(id_name)
}
id_temp <- idcor
for (i in 1:length(id_temp)) {
if (length(id_temp)==1) break
cor_mat <- cor(data[,id_temp])
if (i>= dim(cor_mat)[1]) break
rem <- names(which(abs(cor_mat[,i])>thr)[-1])
if (length(rem)>0) id_temp <- remove(id_name=id_temp,rem=rem)
}
return(id_temp)
}
id_tps <- cormat(idcor=all_id,data=datacor,thr=0.7)
#Order the remaining variables in decreasing order of importance
#varcor : variable to explain
#all_id : vector of variable to use in the correlation
#dat_cor: dataset to be used
#ord : stat to be used to sort the variable (R2, PERVAR percentage of total variance,PEARSON coefficient)
look_cor <- function(all_id,dat_cor=datacor,varcor,ord){
#all_id=all_id
#dat_cor=data_fisheyes
#varcor="esthe_fisheyes_all"
#ord="R2"
modall <- lm(as.formula(paste(varcor," ~ ", paste(all_id, collapse= "+"))),data=dat_cor, na.action=na.omit)
out_data <- as.data.frame(matrix(NA,ncol=4,nrow=length(all_id)))
colnames(out_data) <- c("var","pervar","R2","pearson.p")
out_data$var <- all_id
for (i in 1:length(all_id))
{
cort <- cor.test(dat_cor[,varcor],dat_cor[,all_id[i]])
out_data$pearson.p[i] <- cort$p.value
modminus <- lm(as.formula(paste(varcor," ~ ", paste(all_id[-i], collapse= "+"))),data=dat_cor, na.action=na.omit)
modalone <- lm(as.formula(paste(varcor," ~ ", all_id[i])),data=dat_cor, na.action=na.omit)
out_data$pervar[i] <- (100-(summary(modminus)[[8]]/summary(modall)[[8]])*100)
out_data$R2[i] <- summary(modalone)[[8]]
}
if (ord=="R2") corres <- out_data[order(-out_data$R2),]
if (ord=="PERVAR") corres <- out_data[order(-out_data$pervar),]
if (ord=="PEARSON") corres <- out_data[order(-out_data$pearson.p),]
return(corres)
}
ord <- look_cor(id_tps,dat_cor=datacor,varcor,ord="PERVAR")
id_ord <- ord$var
#build the full model
modall <- lm(as.formula(paste(varcor," ~ ", paste(id_ord, collapse= "+"))),data=datacor)
summary(modall)
#Keep the variables that are significant
id_final <- c("CL_cie_d_mean","LS_sd_light","LS_mean_satu",
"MO_Fourier_pc1","MO_Fourier_pc2","LS_mean_light","SH_perimeter_mean","SH_core.area.index_sd",
"SH_perim.area.ratio_sd","SH_n.core.cell_mean")
modfinal <- lm(as.formula(paste(varcor," ~ ", paste(id_final, collapse= "+"))),data=datacor)
summary(modfinal)
#remove SH_n.core.cell_mean which is not significant anymore
id_final <- c("CL_cie_d_mean","LS_sd_light","LS_mean_satu",
"MO_Fourier_pc1","MO_Fourier_pc2","LS_mean_light","SH_perimeter_mean","SH_core.area.index_sd",
"SH_perim.area.ratio_sd")
modfinal <- lm(as.formula(paste(varcor," ~ ", paste(id_final, collapse= "+"))),data=datacor)
summary(modfinal)
#Scale the variables in the final model & compare the Estimates (FIGURE 2a 600x900)
data_scale <- cbind.data.frame(esthe_pred=datacor$esthe_pred, scale(datacor[,id_final]))
modfinal <- lm(as.formula(paste(varcor," ~ ", paste(id_final, collapse= "+"))),data=data_scale)
sum_mod <- summary(modfinal)
x <- rownames(sum_mod$coefficients)
x <-x[-1]
Estimate <- sum_mod$coefficients[-1,"Estimate"]
sdev <- sum_mod$coefficients[-1,"Std. Error"]
data_plot <- cbind.data.frame(name=x,Estimate=Estimate,sdev=sdev)
data_plot <- data_plot[order(data_plot$Estimate,decreasing = FALSE),]
data_plot$pos <- NA
for (i in 1:length(data_plot$pos)) {if (data_plot$Estimate[i]<0) data_plot$pos[i]=10 else data_plot$pos[i]=-35}
#FIGURE 2a
##Change the names of the variables (see Text F in S1 File)
data_plot$name <- c("Elongatedness","Pattern variation","Pattern asymmetry","Mean lightness","Morpho_2","Pattern repetition","SD lightness","Color saturation","Color heterogeneity")
##Draw the figure
library(ggplot2)
theme_set(theme_bw())
plot <- ggplot(data_plot) +
geom_bar( aes(x=name, y=Estimate), stat="identity", fill=c("tomato1","tomato1","seagreen3","seagreen3","seagreen3","seagreen3","seagreen3","seagreen3","seagreen3"), alpha=0.8) +
geom_errorbar( aes(x=name, ymin=Estimate-sdev, ymax=Estimate+sdev), width=0.3, colour="azure4", alpha=0.9, size=1) +
scale_x_discrete(limits=data_plot$name)+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.y = element_blank(),
axis.text=element_text(size=10, family = "serif"),
axis.title=element_text(size=18, family = "serif")
) +
xlab("Images Features") +
ylab("Estimates (scaled)") +
coord_flip() +
geom_text(aes(x=name, y=pos,label = name),hjust = 0,size=5, family = "serif")
ggplot2::ggsave(filename = here::here("figures_tables", "FIGURE_2a.jpg"),
plot, width = 14, height = 18, units = "cm", dpi = 600)
#PCA ANALYSIS FIGURE 2b
data_scale <- cbind.data.frame(esthe_pred=datacor$esthe_pred, scale(datacor[,id_final]))
##Change the names of the variables (see Text F in S1 File)
colnames(data_scale) <- c("esthe_pred","Color heterogeneity","SD lightness","Color saturation","Elongatedness",
"Morpho_2","Mean lightness","Pattern repetition","Pattern asymmetry","Pattern variation")
pca_fish<-ade4::dudi.pca(data_scale[,-1], scannf=FALSE, nf = 5)
factoextra::fviz_eig(pca_fish,main = "Eigenvalues")
##with ggplot (old version )
# library(ggplot2)
# factoextra::fviz_pca_biplot(pca_fish,
# col.ind = data_scale$esthe_pred,
# geom = "point",
# gradient.cols = c("blue", "green","yellow","orange", "red" ),
# repel = TRUE,
# col.var = "darkblue",
# geom.var = c("arrow", "text"),
# alpha.ind = 0.9,
# ggtheme = theme_minimal(),
# ylim = c(-5,6),
# xlim = c(-7,5),
# legend.position = c(0.2, 0.8)) +
# labs(col="Aesthetic", title = " ") +
# theme(legend.position = c(0.95, 0.9))
## old fashion (thanks Nicolas_C !)
source(here::here("R", "shadow_text.R"))
n_bins <- 255
couleurs <- c('#a50026', '#d73027', '#f46d43', '#fdae61', '#fee090', '#e0f3f8',
'#abd9e9', '#74add1', '#4575b4', '#313695')
couleurs <- colorRampPalette(couleurs)(n_bins)
couleurs <- rev(couleurs)
couleurs <- paste0(couleurs, "dd")
z_range <- range(data_scale$"esthe_pred")
z_bins <- seq(z_range[1], z_range[2], length.out = n_bins + 1)
z_colors <- rep(NA, length(data_scale$"esthe_pred"))
for (i in 1:n_bins) {
if (i == n_bins) {
pos <- which(data_scale$"esthe_pred" >= z_bins[i] &
data_scale$"esthe_pred" <= z_bins[i + 1])
} else {
pos <- which(data_scale$"esthe_pred" >= z_bins[i] &
data_scale$"esthe_pred" < z_bins[i + 1])
}
if (length(pos)) {
z_colors[pos] <- couleurs[i]
}
}
inds <- pca_fish$"li"
eigens <- round(100 * pca_fish$eig / sum(pca_fish$eig) , 1)
eigens <- eigens[1:2]
ordre <- order(data_scale$"esthe_pred", decreasing = FALSE)
png(filename = here::here("figures_tables", "FIGURE_2b.png"), width = 12, height = 12, res = 600,
units = "in", pointsize = 16)
par(xaxs = "i", yaxs = "i", mar = c(2.5, 2.5, 1, 1), family = "serif", new = FALSE)
plot(0, type = "n", xlim = c(-9, 7), ylim = c(-7, 8), ann = FALSE, bty = "n",
axes = FALSE)
grid(lty = 1, lwd = 0.5, col = "#cccccc")
par(mgp = c(3, 0.25, 0))
axis(1, lwd = 0)
axis(2, lwd = 0, las = 1)
mtext(text = paste0("Dimension 1 (", eigens[1],"%)"), side = 1, line = 1.25,cex=1.8)
mtext(text = paste0("Dimension 2 (", eigens[2],"%)"), side = 2, line = 1.25,cex=1.8)
points(inds[ordre, 1:2], pch = 19, col = z_colors[ordre])
par(new = TRUE)
plot(0, type = "n", xlim = c(-1.29, 1), ylim = c(-0.87, 1), ann = FALSE, bty = "n",
axes = FALSE)
vars <- pca_fish$co
for (i in 1:nrow(vars)) {
shape::Arrows(0, 0, vars[i, 1], vars[i, 2], arr.type = "triangle", lwd = 3.50,
arr.width = 0.15, arr.length = 0.15, col = "white")
shape::Arrows(0, 0, vars[i, 1], vars[i, 2], arr.type = "triangle", lwd = 2.25,
arr.width = 0.15, arr.length = 0.15, col = "#111111")
}
abline(h = 0, v = 0, lwd = 1, lty = 1, col = "#333333")
box(col = "#cccccc", lwd = 1)
for (i in 1:nrow(vars)) {
pos <- 1
if (i %in% c(5,3,1,8,9)) {
pos <- 2
}
if (i %in% c(2,7)) {
pos <- 3
}
shadow_text(x = vars[i, 1], y = vars[i, 2], labels = rownames(vars)[i],
pos = pos, cex = 0.95, font = 2, col = "black", bg = "#FFFFFF33",
radius = 0.1)
}
par(new = TRUE)
y_bot <- 5.0
plot(0, type = "n", xlim = c(-9, 7), ylim = c(-7, 8), ann = FALSE, bty = "n",
axes = FALSE)
inc <- 0.01
for (i in 1:length(couleurs))
rect(5, y_bot + (i - 1) * inc, 5.5, y_bot + i * inc, col = couleurs[i],
border = couleurs[i])
llabels <- c(1250, 1500, 1750, 2000)
for (i in llabels) {
pos <- which(z_bins > i)[1]
text(5.5, y_bot + pos * inc, i, pos = 4, cex = .85)
lines(c(5.0, 5.1), rep(y_bot + pos * inc, 2), col = "white")
lines(c(5.4, 5.5), rep(y_bot + pos * inc, 2), col = "white")
}
shadow_text(5.25, y_bot - 0.75, "Aesthetic",
pos = 3, cex = 1.6, font = 1, col = "black", bg = "#FFFFFF33",
radius = 0.1)
dev.off()
## find coordinates of fish to illustrate the PCA figure
res.ind <- factoextra::get_pca_ind(pca_fish)
identifyPch <- function(x, y = NULL, n = length(x), plot = FALSE, pch = 19, ...)
{
xy <- xy.coords(x, y); x <- xy$x; y <- xy$y
sel <- rep(FALSE, length(x))
while(sum(sel) < n) {
ans <- identify(x[!sel], y[!sel], labels = which(!sel), n = 1, plot = plot, ...)
if(!length(ans)) break
ans <- which(!sel)[ans]
points(x[ans], y[ans], pch = pch)
sel[ans] <- TRUE
}
## return indices of selected points
which(sel)
}
temp <- read.csv(here::here("data","Data_S4.csv"),sep=";")
colnames(temp) <- c("name","url")
x <- res.ind$coord[,'Dim.1']
y <- res.ind$coord[,'Dim.2']
df <- data.frame(x=x, y=y,cop=datacor$copyright)
df$cop_col <- "grey"
df$cop_col[rownames(df) %in% temp$name[stringr::str_detect(temp$url, "reeflifesurvey")]]="red"
rownames(df) <- rownames(res.ind$coor)
df <- df[df$cop_col=="red",]
plot(y ~ x, data=df,ylim = c(-5,6),xlim = c(-7,5))
abline(v=0)
abline(h=0)
id_fish <- identifyPch(x=df$x,y=df$y)
rownames(df)[id_fish]
#end----
####PCA analysis FIGURE S1 E ----
pca_res <- prcomp(datall[,which(colnames(datall) %in% all_id)], center = TRUE,scale. = TRUE)
summary(pca_res)
pca_res.campg <- datall$campg
str(pca_res)
library(ggplot2)
library(ggbiplot)
plot <- ggbiplot::ggbiplot(pca_res,var.axes=FALSE,groups=pca_res.campg,choices=c(1,2),alpha =0.5,ellipse = TRUE,ellipse.prob=0.99)+scale_colour_manual(values = c("grey", "red"))+ theme_bw()
ggplot2::ggsave(filename = here::here("figures_tables", "FIGURE_E.jpg"),
plot = plot,
width = 16, height = 16, units = "cm", dpi = 600)
#end----
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.