# rm(list = ls())
# install.packages('cowplot')
#*******************************************************************************************************
# summary function
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=F,
conf.interval=.95, .drop=TRUE) {
library(plyr)
# New version of length which can handle NA's: if na.rm==T, don't count them
length2 <- function (x, na.rm=FALSE) {
if (na.rm) sum(!is.na(x))
else length(x)
}
# This does the summary. For each group's data frame, return a vector with
# N, mean, and sd
datac <- ddply(data, groupvars, .drop=.drop,
.fun = function(xx, col) {
c(N = length2(xx[[col]], na.rm=na.rm),
mean = mean (xx[[col]], na.rm=na.rm),
median = median (xx[[col]], na.rm=na.rm),
#do.call("rbind", tapply(xx[[col]], measurevar, quantile, c(0.25, 0.5, 0.75)))
sd = sd (xx[[col]], na.rm=na.rm)
)
},
measurevar
)
# Rename the "mean" column
datac <- rename(datac, c("mean" = measurevar))
datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean
# Confidence interval multiplier for standard error
# Calculate t-statistic for confidence interval:
# e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
ciMult <- qt(conf.interval/2 + .5, datac$N-1)
datac$ci <- datac$se * ciMult
return(datac)
}
#*******************************************************************************************************
# clean function 1
clean_data <- function (data) {
sdata <- data
sdata <- sdata[!is.na(sdata$Unsaturated_K2cm_cmhr), ]
sdata <- sdata[!is.na(sdata$Percent_Sand), ]
sdata <- sdata[!is.na(sdata$Type), ]
sdata <- sdata[,c(which(colnames(sdata)=="Percent_Clay"),which(colnames(sdata)=="Percent_Silt")
, which(colnames(sdata)=="Percent_Sand"), which(colnames(sdata)=="Texture_mod")
, which(colnames(sdata)=="Soil_Series_Type"), which(colnames(sdata)=="Unsaturated_K2cm_cmhr")
)]
sdata <- sdata[!is.na(sdata$Texture_mod),]
colnames(sdata) <- c("CLAY", "SILT", "SAND", "TEXTURE", "UF", "Unsaturated_K2cm_cmhr")
# handle NA data
sdata$sum <- sdata$CLAY + sdata$SILT + sdata$SAND
# convert sum to 100
sdata$ratio <- 100/sdata$sum
sdata$CLAY <- sdata$CLAY*sdata$ratio
sdata$SILT <- sdata$SILT*sdata$ratio
sdata$SAND <- sdata$SAND*sdata$ratio
sdata <- sdata[sdata$UF == "Urban Observed",]
sdata <- sdata[!is.na(sdata$Unsaturated_K2cm_cmhr),]
sdata <- sdata[!is.na(sdata$CLAY),]
print(paste0('-----------------------obs(n)=',nrow(sdata)))
sdata
}
#*******************************************************************************************************
# clean function 2
# length(sdata[which(sdata$Type == "fine granular structure"),]$Type)
update_structure <- function( data ) {
sdata <- data
sdata <- sdata[!is.na(sdata$Unsaturated_K2cm_cmhr), ]
sdata <- sdata[!is.na(sdata$Percent_Sand), ]
sdata <- sdata[!is.na(sdata$Type), ]
sdata$Top_Type <- as.character(sdata$Top_Type)
# sdata$Type <- as.character(sdata$Type)
for ( i in 1:nrow(sdata) ) {
if( sdata[i,]$Type == "fine granular structure" ) {
sdata[i,]$Top_Type <- "granular" }
else if ( sdata[i,]$Type == "single grain" ){
sdata[i,]$Top_Type <- "single grain"
}
else if ( sdata[i,]$Type == "medium granular structure" ){
sdata[i,]$Top_Type <- "granular"
}
else if (sdata[i,]$Type == "thin and medium plate-like structure"){
sdata[i,]$Top_Type <- "platy"
}
else if (sdata[i,]$Type == "massive"){
sdata[i,]$Top_Type <- "massive"
}
else if (sdata[i,]$Type == "medium subangular blocky"){
sdata[i,]$Top_Type <- "blocky"
}
else if (sdata[i,]$Type == "medium and fine granular"){
sdata[i,]$Top_Type <- "granular"
}
else if (sdata[i,]$Type == "coarse granular blocky" ){
sdata[i,]$Top_Type <- "blocky"}
else if (sdata[i,]$Type == "fine subangular blocky"){
sdata[i,]$Top_Type <- "blocky"
}
else if (sdata[i,]$Type == "fine and medium granular structure" ){
sdata[i,]$Top_Type <- "granular"
}
else if (sdata[i,]$Type == "medium platy structure" ){
sdata[i,]$Top_Type <- "platy"}
else if (sdata[i,]$Type == "fine and medium subangular blocky"){
sdata[i,]$Top_Type <- "blocky"
}
else if ( sdata[i,]$Type == "fine and medium prismatic structure" ){
sdata[i,]$Top_Type <- "prismatic"}
else if (sdata[i,]$Type == "medium granular and strong"){
sdata[i,]$Top_Type <- "granular"
}
else if (sdata[i,]$Type == "medium angular blocky"){
sdata[i,]$Top_Type <- "blocky" }
else if (sdata[i,]$Type == "fine angular structure"){
sdata[i,]$Top_Type <- "blocky"}
else if (sdata[i,]$Type == "medium prismatic parting to moderate medium subangular blocky"){
sdata[i,]$Top_Type <- "blocky"}
else if (sdata[i,]$Type == "medium prismatic structure parting to moderate medium subangular blocky"){
sdata[i,]$Top_Type <- "blocky"}
else if (sdata[i,]$Type == "coarse prismatic"){
sdata[i,]$Top_Type <- "prismatic"}
else if (sdata[i,]$Type == "medium prismatic"){
sdata[i,]$Top_Type <- "prismatic"}
else if (sdata[i,]$Type == "angular blocky"){
sdata[i,]$Top_Type <- "blocky"}
else if (sdata[i,]$Type == "very coarse prismatic structure"){
sdata[i,]$Top_Type <- "prismatic"}
else if (sdata[i,]$Type == "very fine granular structure"){
sdata[i,]$Top_Type <- "granular"}
else if (sdata[i,]$Type == "coarse subangular blocky"){
sdata[i,]$Top_Type <- "blocky"}
else if (sdata[i,]$Type == "fine subangular and angular blocky"){
sdata[i,]$Top_Type <- "blocky"}
else if (sdata[i,]$Type == "very fine and fine subangular blocky"){
sdata[i,]$Top_Type <- "blocky"}
else if (sdata[i,]$Type == "medium and coarse subangular blocky"){
sdata[i,]$Top_Type <- "blocky"}
else if (sdata[i,]$Type == "subangular blocky"){
sdata[i,]$Top_Type <- "blocky"}
else if (sdata[i,]$Type == "fine granular structure and weak very fine subangular blocky"){
sdata[i,]$Top_Type <- "granular" }
else { sdata[i,]$Top_Type <- NA }
print (paste0('******',i))
}
# 2 convert sum to 100
sdata$sum <- sdata$Percent_Sand + sdata$Percent_Silt + sdata$Percent_Clay
sdata$ratio <- 100/sdata$sum
sdata$Percent_Clay <- sdata$Percent_Clay*sdata$ratio
sdata$Percent_Silt <- sdata$Percent_Silt*sdata$ratio
sdata$Percent_Sand <- sdata$Percent_Sand*sdata$ratio
sdata$Top_Type <- as.factor(sdata$Top_Type)
return (sdata)
}
#*******************************************************************************************************
# prepare data for ANN and RF modeling
data_ann <- function (sdata) {
ann_cols <- c("Percent_Sand", "Percent_Silt", "Percent_Clay", "Ksat_cmhr", "Unsaturated_K2cm_cmhr")
sdata[, ann_cols]
}
data_rf <- function (sdata) {
dataRF <- sdata[,c(which(colnames(sdata)=="Percent_Sand")
, which(colnames(sdata)=="Percent_Silt")
, which(colnames(sdata)=="Percent_Clay")
, which(colnames(sdata)=="Grade")
, which(colnames(sdata)=="Top_Type")
, which(colnames(sdata)=="Ksat_cmhr")
, which(colnames(sdata)=="Unsaturated_K2cm_cmhr")
, which(colnames(sdata)=="BulkDensity_gcm3") )]
return(dataRF)
}
#*******************************************************************************************************
# ANN model
ann_ssc <- function(sdata) {
neuralnet(Unsaturated_K2cm_cmhr ~ Percent_Sand + Percent_Silt + Percent_Clay
,data=sdata, hidden=c(5,3), linear.output=T, stepmax=1e6)
}
#*******************************************************************************************************
# RF1 model
rf_ssc <- function(sdata) {
randomForest(Unsaturated_K2cm_cmhr ~ Percent_Sand + Percent_Silt + Percent_Clay , data=sdata,
ntree = 100,
mtry = 2,
importance = TRUE,
proximity = TRUE)
}
#*******************************************************************************************************
# RF2 model
rf_sscs <- function(sdata) {
randomForest(Unsaturated_K2cm_cmhr ~ Percent_Sand + Percent_Silt + Percent_Clay + Top_Type, data=sdata,
ntree = 100,
mtry = 2,
importance = TRUE,
proximity = TRUE)
}
#*******************************************************************************************************
# plot and test RF2
rf2_visual <- function (model) {
p1 <- qplot(1:100,model$mse ) + geom_line() + theme_bw() +
xlab ("Number of trees (n)") + ylab("MSE") +
theme(axis.text.x = element_text(face = "bold", size = 12),
axis.text.y = element_text(face = "bold", size = 12),
axis.title = element_text(face = "bold", size = 12)
)
# annotate("text", x = 8, y = 7.15, label = "( a )", size = 6)
# panel (b)
p2 <- qplot(sort(importance(model, type = 1)[1:4]), 1:4) + geom_line() + theme_bw() +
xlab ("Change of MSE (%)") +
theme(axis.text.x = element_text(face = "bold", size = 12),
axis.text.y = element_text(face = "bold", size = 12),
axis.title = element_text(face = "bold", size = 12),
axis.title.y = element_blank()
) + scale_y_continuous(labels = c("Structure", "%Silt", "%Clay", "%Sand") )
# annotate("text", x =4.25, y = 3.75, label = "( b )", size = 6)
# panel (c)
p3 <- qplot(sort(importance(model, type = 2)[1:4]), 1:4) + geom_line() + theme_bw() +
xlab ("Change of node purity") +
theme(axis.text.x = element_text(face = "bold", size = 12),
axis.text.y = element_text(face = "bold", size = 12),
axis.title = element_text(face = "bold", size = 12),
axis.title.y = element_blank()
) + scale_y_continuous(labels = c(" ", " ", " ", " ") )
# annotate("text", x = 650, y = 3.75, label = "( c )", size = 6)
panel_b <- plot_grid(p2, p3, ncol = 2)
# print(panel_b)
print(plot_grid(p1, panel_b, nrow = 2 ))
invisible(list(p1,p2,p3, panel_b))
}
#*******************************************************************************************************
# model evaluation functions
model_evaluation1 <- function (sdata) {
sdata$S_M <- sdata$kfs_m - sdata$kfs
# hist(sdata$S_M)
E <- round(sum(sdata$S_M)/length(sdata$S_M), 5)
# t-test
p <- round(t.test(sdata$S_M)$p.value, 5)
# calculate d
d <- round(1- sum(sdata$S_M^2)/sum((abs(sdata$kfs_m-mean(sdata$kfs))+abs(sdata$kfs-mean(sdata$kfs)))^2), 5)
# calculate EF
EF <- round(1- sum(sdata$S_M^2)/sum((sdata$kfs-mean(sdata$kfs))^2), 5)
# calculate RMSE
RMSE <- round((sum(sdata$S_M^2)/length(sdata$S_M))^0.5, 5)
df <- t.test(sdata$S_M)[2]
eval_mat <- data.frame(E, p, d, EF, RMSE, df)
return(eval_mat)
}
#*******************************************************************************************************
# plot figure 4
# colnames(train)
# colnames(test_rosetta)
# max(data$Unsaturated_K2cm_cmhr)
model_evaluation2 <- function () {
# a,b,c,d,e,f,g are data for panels a-h
panel_a <- qplot(kfs_m, kfs, data = train) + geom_abline(slope = 1, linetype = 2, size = 1.5, col = 'blue') +
geom_smooth(method = 'lm', col = 'red') + ylim (0, 50) + xlim(0, 50) +
theme(axis.title.x = element_blank()) + theme(axis.title.y = element_blank())
panel_b <- qplot(kfs_m, kfs, data = train_RF) + geom_abline(slope = 1, linetype = 2, size = 1.5, col = 'blue') +
geom_smooth(method = 'lm', col = 'red') + ylim (0, 50) + xlim(0, 50)+
theme(axis.title.x = element_blank())+ theme(axis.title.y = element_blank())
panel_c <- qplot(kfs_m, kfs, data = train_RF2) + geom_abline(slope = 1, linetype = 2, size = 1.5, col = 'blue') +
geom_smooth(method = 'lm', col = 'red') + ylim (0, 50) + xlim(0, 50)+
theme(axis.title.x = element_blank())+ theme(axis.title.y = element_blank())
panel_HSD <- qplot(kfs_m, kfs, data = train_RF2) + geom_abline(slope = 1, linetype = 2, size = 1.5, col = 'blue') +
geom_smooth(method = 'lm', col = 'red') + ylim (0, 50) + xlim(0, 50)+
theme(axis.title.x = element_blank())+ theme(axis.title.y = element_blank())
panel_d <- qplot(kfs_m, kfs, data = test) + geom_abline(slope = 1, linetype = 2, size = 1.5, col = 'blue') +
geom_smooth(method = 'lm', col = 'red') + ylim (0, 50) + xlim(0, 50)+
theme(axis.title.x = element_blank())+ theme(axis.title.y = element_blank())
panel_e <- qplot(kfs_m, kfs, data = test_RF) + geom_abline(slope = 1, linetype = 2, size = 1.5, col = 'blue') +
geom_smooth(method = 'lm', col = 'red') + ylim (0, 50) + xlim(0, 50)+
theme(axis.title.x = element_blank())+ theme(axis.title.y = element_blank())
panel_f <- qplot(kfs_m, kfs, data = test_RF2) + geom_abline(slope = 1, linetype = 2, size = 1.5, col = 'blue') +
geom_smooth(method = 'lm', col = 'red') + ylim (0, 50) + xlim(0, 50)+
theme(axis.title.x = element_blank())+ theme(axis.title.y = element_blank())
panel_g <- qplot(kfs_m, kfs, data = test_rosetta) + geom_abline(slope = 1, linetype = 2, size = 1.5, col = 'blue') +
geom_smooth(method = 'lm', col = 'red') + ylim (0, 50) +
theme(axis.title.x = element_blank())+ theme(axis.title.y = element_blank())
y.grob <- textGrob(expression(Measured~K[fs]~cm~h^-1),
gp=gpar(fontface="bold", col="black", fontsize=15), rot=90)
x.grob <- textGrob(expression(Predicted~K[fs]~cm~h^-1),
gp=gpar(fontface="bold", col="black", fontsize=15))
plot <- plot_grid (panel_a, panel_d, panel_b, panel_e, panel_c, panel_f, panel_HSD, panel_g, ncol = 2, nrow = 4
, labels = c('( a ) ANN train', '( e ) ANN test', '( b ) RF1 train', '( f ) RF1 test'
, '( c ) RF2 train', '( g ) RF2 test', '( d ) HSD ', '( h ) ROSETTA')
, vjust = 3, hjust = c(-0.8, -0.8, -0.8, -0.8, -0.8, -0.8, -0.8, -0.75), label_size = 8)
grid.arrange(arrangeGrob(plot, left = y.grob, bottom = x.grob))
}
#*******************************************************************************************************
# plot figure 5
model_evaluation3 <- function () {
panel_a <- qplot(Ksat_ANN, Ksat, data = histdata) + geom_abline(slope = 1, linetype = 2, size = 1.5, col = 'blue') +
geom_smooth(method = 'lm', col = 'red') + ylim (0, 25) + xlim(0, 25) +
theme(axis.title.x = element_blank()) + theme(axis.title.y = element_blank())
panel_b <- qplot(Ksat_RF, Ksat, data = histdata) + geom_abline(slope = 1, linetype = 2, size = 1.5, col = 'blue') +
geom_smooth(method = 'lm', col = 'red') + ylim (0, 25) + xlim(0, 25) +
theme(axis.title.x = element_blank()) + theme(axis.title.y = element_blank())
panel_c <- qplot(Ksat_RF2, Ksat, data = histdata) + geom_abline(slope = 1, linetype = 2, size = 1.5, col = 'blue') +
geom_smooth(method = 'lm', col = 'red') + ylim (0, 25) + xlim(0, 25) +
theme(axis.title.x = element_blank()) + theme(axis.title.y = element_blank())
panel_d <- qplot(Ksat_Rosseta, Ksat, data = histdata) + geom_abline(slope = 1, linetype = 2, size = 1.5, col = 'blue') +
geom_smooth(method = 'lm', col = 'red') + ylim (0, 25) +
theme(axis.title.x = element_blank()) + theme(axis.title.y = element_blank())
y.grob <- textGrob(expression(Measured~K[fs]~cm~h^-1),
gp=gpar(fontface="bold", col="black", fontsize=15), rot=90)
x.grob <- textGrob(expression(Predicted~K[fs]~cm~h^-1),
gp=gpar(fontface="bold", col="black", fontsize=15))
plot <- plot_grid (panel_a, panel_c, panel_b, panel_d, ncol = 2, nrow = 2
, labels = c('( a ) ANN', '( c ) RF2', '( b ) RF1', '( d ) ROSETTA')
, vjust = 3, hjust = c(-1.2, -1.2, -1.2, -0.7), label_size = 8 )
grid.arrange(arrangeGrob(plot, left = y.grob, bottom = x.grob))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.