# Set chunks defaults; these options will be applied to all subsequent chunks knitr::opts_chunk$set(results = 'hide', message = TRUE, include = TRUE, echo = FALSE, fig.height = 5, fig.width = 7) # if(!file.exists(OUTPUT_DIR)) dir.create(OUTPUT_DIR) # if(!file.exists(LOG_DIR)) dir.create( LOG_DIR ) library(cowplot) library(grid) library(gridExtra) # install.packages("soiltexture") # library( soiltexture ) library("ggpubr") library(ggplot2) # install.packages("randomForest") library(randomForest) # install.packages('neuralnet') library(neuralnet) library(reshape) library(scales) library(devtools) library(dplyr) library(readxl) library(patchwork) source('functions.R') theme_set(theme_bw())
read_xlsx <- function(x, n_sheet, n_skip) read_excel(file.path(DATA_DIR, x), sheet = n_sheet, skip = n_skip)
# Updated 2020 Dec All_cite_new <- update_urban_kfs() # write.csv(All_cite_new, "../extdata/AllCities_Victoria_RDS.csv", row.names = F) All_cite_new %>% filter(!is.na(Unsaturated_K2cm_cmhr)) All_cite_new %>% mutate(BulkDensity_gcm3 = Bulk_Density_Avg_g_cm3) -> All_cite_new All_cite_new %>% filter(!is.na(BulkDensity_gcm3)) %>% select(BulkDensity_gcm3) All_cite_new[All_cite_new$Unsaturated_K2cm_cmhr < 0.004 & !is.na(All_cite_new$Unsaturated_K2cm_cmhr), ]$Unsaturated_K2cm_cmhr <- 0.0042 # write.csv(All_cite_new, "../extdata/AllCities_Victoria_RDS_rock_bd.csv", row.names = F)
# DATA_DIR <- here::here("extdata") # raw_data <- read.csv(here::here("extdata/AllCities_Victoria_RDS_rock.csv")) # bd_data <- read_xlsx('BDdata.xlsx', n_sheet = 1, n_skip = 0)
# bd_data %>% # select(SampleLayer_ID) %>% # count(SampleLayer_ID) %>% # arrange(n) # # raw_data %>% # select(SampleLayer_ID) %>% # count(SampleLayer_ID) %>% # arrange(n) # # raw_data %>% # select(BulkDensity_gcm3) %>% # na.omit() # # all_city_joint_bd <- left_join(raw_data %>% select(-BulkDensity_gcm3), # bd_data %>% select(SampleLayer_ID, BulkDensity_gcm3), by = "SampleLayer_ID") # # all_city_joint_bd %>% # select(BulkDensity_gcm3) %>% # na.omit() %>% # arrange(BulkDensity_gcm3)
comb_data <- function(){ # Read and clean data histdata <- read.csv(here::here("extdata/UrbanSoilK_V3.csv")) histdata %>% select(Percent_Clay, Percent_Sand, Percent_Silt, Ksat) -> subhist head(subhist) colnames(subhist) <- c("CLAY", "SILT", "SAND", "Unsaturated_K2cm_cmhr") subhist$col <- "red" # resetta test data test_rosetta <- read.csv(here::here("extdata/test_with_rosetta_V3.csv")) test_rosetta$Model <- 'ROSETTA' # filt and clean data for soil triangle plot (Figure 1) data_orig <- read.csv(here::here("extdata/AllCities_Victoria_RDS_rock_bd.csv")) data_orig %>% mutate(sum = Percent_Clay + Percent_Silt + Percent_Sand) %>% filter(sum > 99 & sum < 101) -> data_orig combdata <- clean_data(data_orig) combdata %>% select(CLAY, SILT, SAND, Unsaturated_K2cm_cmhr) -> combdata combdata$col <- "blue" colnames(combdata) <- colnames (subhist) combdata <- rbind(combdata, subhist) return(combdata) } combdata <- comb_data() combdata$Unsaturated_K2cm_cmhr %>% min() combdata$Unsaturated_K2cm_cmhr %>% max() combdata$Unsaturated_K2cm_cmhr %>% mean() # TT.plot does not work in the updated R # tiff(paste("outputs/Figure1-1.tiff"), width = 10, height = 10, pointsize = 1/300, units = 'in', res = 300) # plot soil texture figure # TT.plot( # class.sys = "USDA.TT", # tri.data = combdata, # # z.name = "OC", # grid.col = "white", # grid.lty = 1, # arrows.show = TRUE, # pch = 1, # col = combdata$col, # bg = "white", # fg = "blue", # main = "Soil texture triangle plot" # ) # dev.off() histdata <- read.csv(here::here("extdata/UrbanSoilK_V3.csv")) sd(histdata$Ksat) mean(histdata$Ksat) mean(histdata$Ksat_ANN) mean(histdata$Ksat_RF) mean(histdata$Ksat_RF2) mean(histdata$Ksat_Rosseta)
combdata %>% mutate(log_kfs = log(Unsaturated_K2cm_cmhr)) -> combdata ggplot(combdata, aes(log_kfs, stat(density), fill = col)) + # geom_freqpoly() geom_histogram(binwidth = 0.25, col = 'gray') + # geom_density()+ theme(legend.position = "none", legend.background = element_rect(fill = alpha('white',0.0)) , axis.title = element_text(size = 20) , axis.text = element_text(size = 20)) + scale_fill_manual(values = c("blue", "red")) + # scale_fill_discrete(name = "Data", labels=c('Training', 'Verification')) + xlab(expression( Log~"["~k[n]~(cm~h^{-1})~"]" )) + ylab('Density') # plot_grid(p1, p2, labels = c('( a ), ( b )'), vjust = 4, hjust = c(-2,-2)) # ggsave("Figure1-2.jpg", width = 7.5, height = 7.5, dpi = 300, units = "in")
data_orig <- update_structure(read.csv(here::here("extdata/AllCities_Victoria_RDS_rock_bd.csv"))) data_orig %>% select(BulkDensity_gcm3) %>% na.omit() data_orig %>% select(Unsaturated_K2cm_cmhr) %>% na.omit() %>% arrange(Unsaturated_K2cm_cmhr) # data_orig %>% # select(Percent_Sand, Percent_Silt, Percent_Clay, Ksat_cmhr, Type, BulkDensity_gcm3) %>% # na.omit() data_orig %>% select(Type) %>% unique() data_orig
data_structure <- update_structure(data_orig) data <- data_ann (data_structure) maxs <- apply(data, 2, max) mins <- apply(data, 2, min) scaled <- as.data.frame(scale(data, center = mins, scale = maxs - mins)) index <- sample(1:nrow(data),round(0.85*nrow(data))) train <- scaled[index,] test <- scaled[-index,] ann_Ksat <- ann_ssc(train) train$p.ann_Ksat <- neuralnet::compute(ann_Ksat, train[,c('Percent_Sand' , 'Percent_Silt' , 'Percent_Clay')])$net.result test$p.ann_Ksat <- neuralnet::compute(ann_Ksat, test[,c('Percent_Sand' , 'Percent_Silt' , 'Percent_Clay')])$net.result # scale back the value train$kfs <- train$Unsaturated_K2cm_cmhr * (max(data$Unsaturated_K2cm_cmhr)-min(data$Unsaturated_K2cm_cmhr)) + min(data$Unsaturated_K2cm_cmhr) train$kfs_m <- train$p.ann_Ksat * (max(data$Unsaturated_K2cm_cmhr)-min(data$Unsaturated_K2cm_cmhr)) + min(data$Unsaturated_K2cm_cmhr) train$Model <- 'ANN' # test data test$kfs <- test$Unsaturated_K2cm_cmhr * (max(data$Unsaturated_K2cm_cmhr)-min(data$Unsaturated_K2cm_cmhr)) + min(data$Unsaturated_K2cm_cmhr) test$kfs_m <- test$p.ann_Ksat * (max(data$Unsaturated_K2cm_cmhr)-min(data$Unsaturated_K2cm_cmhr)) + min(data$Unsaturated_K2cm_cmhr) test$Model <- 'ANN' # correlation cor(train$kfs_m, train$kfs, method = c("spearman")) cor(test$kfs_m, test$kfs, method = c("spearman"))
data_structure %>% select(Top_Type, Type) %>% filter(is.na(Top_Type))
unique(data_structure$Top_Type)
dataRF <- data_rf (data_structure) train_RF <- dataRF[index,] test_RF <- dataRF[-index,] rf1 <- rf_ssc(train_RF) # train dataset train_RF$kfs_m <- predict(rf1, train_RF) train_RF$Model <- 'rf1' # test dataset test_RF$kfs_m <- predict(rf1, test_RF) test_RF$Model <- 'rf1' cor(train_RF$kfs_m, train_RF$Unsaturated_K2cm_cmhr, method = c("spearman")) cor(test_RF$kfs_m, test_RF$Unsaturated_K2cm_cmhr, method = c("spearman"))
set.seed(20191120) rf2 <- rf_sscs(data_structure)
set.seed(20191120) importance(rf2, type = 1) # getwd() # pdf("Figure A2.pdf", height = 6, width = 8) rf2_visual(model = rf2) # dev.off() # tiff(paste("Figure A2.tiff"), width = 8, height = 6, pointsize = 1/300, units = 'in', res = 300) rf2_visual(model = rf2) # dev.off()
# rf 3 data_orig %>% select(Percent_Sand, Percent_Silt, Percent_Clay, Percent_Rock_Fragment, Top_Type, Unsaturated_K2cm_cmhr) %>% na.omit() -> data_rock set.seed(20200602) rf3 <- randomForest(Unsaturated_K2cm_cmhr ~ Percent_Sand + Percent_Silt + Percent_Clay + Percent_Rock_Fragment + Top_Type, data=data_rock, ntree = 100, mtry = 2, importance = TRUE, proximity = TRUE) importance(rf3, type = 1) # panel a qplot(1:100, rf3$mse ) + geom_line() + theme_bw() + xlab ("Number of trees (n)") + ylab(" MSE ") + theme(axis.text.x = element_text(face = "bold", size = 12), axis.text.y = element_text(face = "bold", size = 12), axis.title = element_text(face = "bold", size = 12), axis.title.y = element_text(angle = 0, vjust = 0.5) ) -> p_a # panel b qplot(sort(importance(rf3, type = 1)[1:5]), 1:5) + geom_line() + theme_bw() + xlab ("Change of MSE (%)") + theme(axis.text.x = element_text(face = "bold", size = 12), axis.text.y = element_text(face = "bold", size = 12), axis.title = element_text(face = "bold", size = 12), axis.title.y = element_blank()) + scale_y_continuous(labels = c("Structure", "%Rock", "%Silt", "%Clay", "%Sand") )-> p_b # panel c qplot(sort(importance(rf3, type = 2)[1:5]), 1:5) + geom_line() + theme_bw() + xlab ("Change of node purity") + theme(axis.text.x = element_text(face = "bold", size = 12), axis.text.y = element_text(face = "bold", size = 12), axis.title = element_text(face = "bold", size = 12), axis.title.y = element_blank()) + scale_y_continuous(labels = c(" ", " ", " ", " ", "") )-> p_c panel_b <- plot_grid(p_b, p_c, ncol = 2, labels = c("b", "c"), hjust = c(-9.5, -4), vjust = 2.75, rel_widths = c(1.15, 1)) print(plot_grid(p_a, panel_b, nrow = 2, labels = c("a", "") , hjust = -10, vjust = 2.75 ) ) ggsave("Figure A2.tiff", width = 8, height = 6, dpi = 300, units = "in")
# no rock inputs, but ssc scaled to 100% rock_test_data <- clean_data(data_orig) rock_test_data %>% filter(!is.na(ROCK)) %>% filter(Unsaturated_K2cm_cmhr>0) %>% mutate(Unsaturated_K2cm_cmhr = log(Unsaturated_K2cm_cmhr)) -> rock_test_data rock_test1 <- randomForest(Unsaturated_K2cm_cmhr ~ SAND + SILT + CLAY, data=rock_test_data, ntree = 100, mtry = 2, importance = TRUE, proximity = TRUE) rock_test_data$rock_m1 <- predict(rock_test1, rock_test_data) cor(rock_test_data$rock_m1, rock_test_data$Unsaturated_K2cm_cmhr, method = c("spearman")) summary(lm(rock_test_data$Unsaturated_K2cm_cmhr ~ rock_test_data$rock_m1))$r.squared %>% round(2) -> var_r1
# sscr: with sand + silt + clay = 100% rock_test2 <- randomForest(Unsaturated_K2cm_cmhr ~ SAND + SILT + CLAY + ROCK, data=rock_test_data, ntree = 100, mtry = 2, importance = TRUE, proximity = TRUE) # train dataset rock_test_data$rock_m2 <- predict(rock_test2, rock_test_data) cor(rock_test_data$rock_m2, rock_test_data$Unsaturated_K2cm_cmhr, method = c("spearman")) summary(lm(rock_test_data$Unsaturated_K2cm_cmhr ~ rock_test_data$rock_m2))$r.squared %>% round(2)-> var_r2
# sscr: with sand + silt + clay + rock = 100% rock_test_data3 <- clean_data_rock(data_orig) rock_test_data3 %>% filter(Unsaturated_K2cm_cmhr>0) %>% mutate(Unsaturated_K2cm_cmhr = log(Unsaturated_K2cm_cmhr)) -> rock_test_data3 rock_test_data3 %>% select(CLAY, SILT, SAND, ROCK) %>% mutate(check = 100-CLAY-SILT-SAND-ROCK) %>% arrange(check) rock_test3 <- randomForest(Unsaturated_K2cm_cmhr ~ SAND + SILT + CLAY + ROCK, data=rock_test_data3, ntree = 100, mtry = 2, importance = TRUE, proximity = TRUE) # train dataset rock_test_data3$rock_m3 <- predict(rock_test3, rock_test_data3) cor(rock_test_data3$rock_m3, rock_test_data3$Unsaturated_K2cm_cmhr, method = c("spearman")) summary(lm(rock_test_data3$Unsaturated_K2cm_cmhr ~ rock_test_data3$rock_m3))$r.squared %>% round(2)-> var_r3
# sscr: with no scaled sscr data_orig %>% select(Unsaturated_K2cm_cmhr, Percent_Sand, Percent_Clay, Percent_Silt, Percent_Rock_Fragment, Rock_group, Rock_Fragment_Type) %>% na.omit() %>% filter(Unsaturated_K2cm_cmhr > 0) %>% mutate(Unsaturated_K2cm_cmhr = log(Unsaturated_K2cm_cmhr)) -> rock_test_data4 rock_test4 <- randomForest(Unsaturated_K2cm_cmhr ~ Percent_Sand + Percent_Clay + Percent_Silt + Percent_Rock_Fragment, data=rock_test_data4, ntree = 100, mtry = 2, importance = TRUE, proximity = TRUE) # train dataset rock_test_data4$rock_m4 <- predict(rock_test4, rock_test_data4) cor(rock_test_data4$rock_m4, rock_test_data4$Unsaturated_K2cm_cmhr, method = c("spearman")) summary(lm(rock_test_data4$Unsaturated_K2cm_cmhr ~ rock_test_data4$rock_m4))$r.squared %>% round(2) -> var_r4
# with rock group as input # sscrg: with sand + silt + clay = 100% rock_test_data$Rock_group <- ifelse(is.na(rock_test_data$Rock_group), "NotR", as.character(rock_test_data$Rock_group)) rock_test_data$Rock_group <- as.factor(rock_test_data$Rock_group) rock_test5 <- randomForest(Unsaturated_K2cm_cmhr ~ SAND + SILT + CLAY + Rock_group, data=rock_test_data, ntree = 100, mtry = 2, importance = TRUE, proximity = TRUE) # train dataset rock_test_data$rock_m5 <- predict(rock_test5, rock_test_data) cor(rock_test_data$rock_m5, rock_test_data$Unsaturated_K2cm_cmhr, method = c("spearman")) summary(lm(rock_test_data$Unsaturated_K2cm_cmhr ~ rock_test_data$rock_m5))$r.squared %>% round(2)-> var_r5
# with BD as input rock_test6 <- randomForest(Unsaturated_K2cm_cmhr ~ SAND + SILT + CLAY + BD, data=rock_test_data, ntree = 100, mtry = 2, importance = TRUE, proximity = TRUE) # train dataset rock_test_data$rock_m6 <- predict(rock_test6, rock_test_data) cor(rock_test_data$rock_m6, rock_test_data$Unsaturated_K2cm_cmhr, method = c("spearman")) summary(lm(rock_test_data$Unsaturated_K2cm_cmhr ~ rock_test_data$rock_m6))$r.squared %>% round(2)-> var_r6
# ssc+structure rock_test7 <- randomForest(Unsaturated_K2cm_cmhr ~ SAND + SILT + CLAY + Top_Type, data=rock_test_data, ntree = 100, mtry = 2, importance = TRUE, proximity = TRUE) # train dataset rock_test_data$rock_m7 <- predict(rock_test7, rock_test_data) cor(rock_test_data$rock_m7, rock_test_data$Unsaturated_K2cm_cmhr, method = c("spearman")) summary(lm(rock_test_data$Unsaturated_K2cm_cmhr ~ rock_test_data$rock_m7))$r.squared %>% round(2)-> var_r7
# sscr+structure rock_test8 <- randomForest(Unsaturated_K2cm_cmhr ~ SAND + SILT + CLAY + ROCK + Top_Type, data=rock_test_data3, ntree = 100, mtry = 2, importance = TRUE, proximity = TRUE) # train dataset rock_test_data3$rock_m8 <- predict(rock_test8, rock_test_data3) cor(rock_test_data3$rock_m8, rock_test_data3$Unsaturated_K2cm_cmhr, method = c("spearman")) summary(lm(rock_test_data3$Unsaturated_K2cm_cmhr ~ rock_test_data3$rock_m8))$r.squared %>% round(2)-> var_r8
bind_rows(rock_test_data %>% mutate(Model = paste0("(a) ssc (ssc=100%, R^2=",var_r1,")"), Predict = rock_m1) %>% select(Unsaturated_K2cm_cmhr, Predict, Model), rock_test_data %>% mutate(Model = paste0("(b) sscr (ssc=100%, R^2=",var_r2,")"), Predict = rock_m2) %>% select(Unsaturated_K2cm_cmhr, Predict, Model), rock_test_data3 %>% mutate(Model = paste0("(c) sscr (sscr=100%, R^2=",var_r3,")"), Predict = rock_m3) %>% select(Unsaturated_K2cm_cmhr, Predict, Model), rock_test_data4 %>% mutate(Model = paste0("(d) sscr (no scale, R^2=",var_r4,")"), Predict = rock_m4) %>% select(Unsaturated_K2cm_cmhr, Predict, Model), rock_test_data %>% mutate(Model = paste0("(e) sscg (ssc=100%, R^2=",var_r5,")"), Predict = rock_m5) %>% select(Unsaturated_K2cm_cmhr, Predict, Model), rock_test_data %>% mutate(Model = paste0("(f) sscbd (ssc=100%, R^2=",var_r6,")"), Predict = rock_m6) %>% select(Unsaturated_K2cm_cmhr, Predict, Model), rock_test_data %>% mutate(Model = paste0("(g) sscstr (ssc=100%, R^2=",var_r7,")"), Predict = rock_m7) %>% select(Unsaturated_K2cm_cmhr, Predict, Model), rock_test_data3 %>% mutate(Model = paste0("(h) sscrstr (ssc=100%, R^2=",var_r8,")"), Predict = rock_m8) %>% select(Unsaturated_K2cm_cmhr, Predict, Model) ) %>% ggplot(aes(Predict, Unsaturated_K2cm_cmhr)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm") + facet_wrap(. ~ Model, nrow = 4) + labs(x = expression(Predicted~k[n]~(cm~hr^{-1}~","~log~transfered)), y = expression(Measured~k[n]~(cm~hr^{-1}~","~log~transfered))) # ggsave("Figure A3.jpg", width = 6, height = 7, dpi = 300, units = "in")
# stepwise # library(MASS) clean_data_rock(data_orig) %>% na.omit() %>% dplyr::select(CLAY, SILT, SAND, Unsaturated_K2cm_cmhr, ROCK, BD) -> data_structure2 # Fit the full model full.model <- lm(Unsaturated_K2cm_cmhr ~., data = data_structure2) # Stepwise regression model step.model <- MASS::stepAIC(full.model, direction = "both", trace = FALSE) summary(step.model) summary(full.model)
# all_city_joint_bd %>% # select(Percent_Sand, Percent_Silt, Percent_Clay, Unsaturated_K2cm_cmhr, Type) %>% # mutate(ssc = Percent_Sand + Percent_Silt + Percent_Clay) %>% # na.omit() %>% # arrange(ssc)
rock_test_data4 %>% ggplot(aes(Percent_Rock_Fragment, Unsaturated_K2cm_cmhr, col = Rock_group)) + geom_point() + # geom_hex(bins = 30) + # scale_fill_distiller(palette = "YlGnBu", name = "Count") + labs(x = expression(Rock~fragment~("%")), y = expression(Log~"["~k[n]*","~(cm~h^{-1})~"]")) + scale_color_discrete(name = element_blank()) + # scale_fill_continuous(name = element_blank()) + guides(fill = FALSE) + theme(legend.position = "right") -> p1 # density 1 panel rock_test_data4 %>% ggplot(aes(Percent_Rock_Fragment)) + geom_histogram(bins = 50, fill = "white", col = "black") + theme_void() -> dens1 # density 2 panel rock_test_data4 %>% ggplot(aes(Unsaturated_K2cm_cmhr)) + geom_histogram(bins = 25, fill = "white", col = "black") + theme_void() + coord_flip() -> dens2 dens1 + plot_spacer() + p1 + dens2 + plot_layout( ncol = 2, nrow = 2, widths = c(4, 1), heights = c(1, 4) ) -> final_plot print(final_plot) # ggsave("Figure A4.jpg", width = 6, height = 4, dpi = 300, units = "in") rock_test_data4$Percent_Rock_Fragment %>% max()
data_orig %>% filter(!is.na(Unsaturated_K2cm_cmhr) & !is.na(Percent_Sand) & !is.na(Type) & !is.na(Texture) & !is.na(BulkDensity_gcm3)) %>% select(BulkDensity_gcm3, Unsaturated_K2cm_cmhr, Percent_Sand, Percent_Silt, Percent_Clay, BulkDensity_gcm3, Texture) -> bd_test_data bd_test_data %>% mutate(log_kfs = log(Unsaturated_K2cm_cmhr)) -> bd_test_data lm(bd_test_data$log_kfs ~ bd_test_data$BulkDensity_gcm3) %>% summary() # density 1 panel bd_test_data %>% ggplot(aes(BulkDensity_gcm3)) + geom_histogram(bins = 30, fill = "white", col = "black") + theme_void() -> dens_bd1 # density 2 panel bd_test_data %>% ggplot(aes(log_kfs)) + geom_histogram(bins = 20, fill = "white", col = "black") + theme_void() + coord_flip() -> dens_bd2 bd_test_data %>% ggplot(aes(BulkDensity_gcm3, log_kfs)) + # geom_point() + geom_hex(col = "gray", bins = 25) + scale_fill_distiller(palette = "YlGnBu", name = "Count") + labs(x = expression(Bulk~density~(g~cm^{-3})), y = expression(Log~"["~k[n]*","~(cm~h^{-1})~"]")) -> p2 dens_bd1 + plot_spacer() + p2 + dens_bd2 + dens1 + plot_spacer() + p1 + dens2 + plot_layout( ncol = 2, nrow = 4, widths = c(4, 1), heights = c(1, 4) ) -> final_plot_bd print(final_plot_bd) # ggsave("Figure2-BD.jpg", width = 6, height = 7, dpi = 300, units = "in")
histdata wilcox.test(histdata$Ksat, histdata$Ksat_ANN, mu = 0, paired = TRUE, alternative = "two.sided", conf.int = TRUE, conf.level = 0.95) wilcox.test(histdata$Ksat, histdata$Ksat_RF, mu = 0, paired = TRUE, alternative = "two.sided", conf.int = TRUE, conf.level = 0.95) wilcox.test(histdata$Ksat, histdata$Ksat_RF2, mu = 0, paired = TRUE, alternative = "two.sided", conf.int = TRUE, conf.level = 0.95) wilcox.test(histdata$Ksat, histdata$Ksat_Rosseta, mu = 0, paired = TRUE, alternative = "two.sided", conf.int = TRUE, conf.level = 0.95)
## ssc: with no scaled ssc rf_no_bd <- randomForest(log_kfs ~ Percent_Sand + Percent_Clay + Percent_Silt, data=bd_test_data, ntree = 100, mtry = 2, importance = TRUE, proximity = TRUE) ## train dataset bd_test_data$rf_no_bd <- predict(rf_no_bd, bd_test_data) bd_test_data %>% mutate(S_M1 = log_kfs - rf_no_bd) -> bd_test_data cor(bd_test_data$rf_no_bd, bd_test_data$log_kfs, method = c("spearman")) slr_rf_np_bd <- lm(bd_test_data$log_kfs ~ bd_test_data$rf_no_bd) tibble(inter = summary(slr_rf_np_bd)$coefficients[1,1] %>% round(3), slope = summary(slr_rf_np_bd)$coefficients[2,1] %>% round(3), r2 = summary(slr_rf_np_bd)$adj.r.squared %>% round(2), E = sum(bd_test_data$S_M1) / length(bd_test_data$S_M1) %>% round(3), #E RMSE = (sum(bd_test_data$S_M1^2)/length(bd_test_data$S_M1))^0.5 %>% round(3), d = (1- sum(bd_test_data$S_M1^2)/sum((abs(bd_test_data$log_kfs-mean(bd_test_data$rf_no_bd))+ abs(bd_test_data$rf_no_bd-mean(bd_test_data$rf_no_bd)))^2)) %>% round(3) ) ## sscbd: with no scaled ssc rf_wt_bd <- randomForest(log_kfs ~ Percent_Sand + Percent_Clay + Percent_Silt + BulkDensity_gcm3, data=bd_test_data, ntree = 100, mtry = 2, importance = TRUE, proximity = TRUE) ## train dataset bd_test_data$rf_wt_bd <- predict(rf_wt_bd, bd_test_data) cor(bd_test_data$rf_wt_bd, bd_test_data$log_kfs, method = c("spearman")) bd_test_data %>% mutate(S_M3 = log_kfs - rf_wt_bd) -> bd_test_data slr_rf_wt_bd <- lm(bd_test_data$log_kfs ~ bd_test_data$rf_wt_bd) tibble(inter = summary(slr_rf_wt_bd)$coefficients[1,1] %>% round(3), slope = summary(slr_rf_wt_bd)$coefficients[2,1] %>% round(3), r2 = summary(slr_rf_wt_bd)$adj.r.squared %>% round(2), E = sum(bd_test_data$S_M3) / length(bd_test_data$S_M3) %>% round(3), #E RMSE = (sum(bd_test_data$S_M3^2)/length(bd_test_data$S_M3))^0.5 %>% round(3), d = (1- sum(bd_test_data$S_M3^2)/sum((abs(bd_test_data$log_kfs-mean(bd_test_data$rf_wt_bd))+ abs(bd_test_data$rf_wt_bd-mean(bd_test_data$rf_wt_bd)))^2)) %>% round(3) ) ## ssc: with no scaled ssc, ann model ann_no_bd <- neuralnet(log_kfs ~ Percent_Sand + Percent_Silt + Percent_Clay, data=bd_test_data, hidden=c(5,3), linear.output=T, stepmax=1e6) ## train dataset bd_test_data$ann_no_bd <- neuralnet::compute(ann_no_bd, bd_test_data[,c('Percent_Sand' , 'Percent_Silt' , 'Percent_Clay')])$net.result cor(bd_test_data$ann_no_bd, bd_test_data$log_kfs, method = c("spearman")) slr_ann_np_bd <- lm(bd_test_data$log_kfs ~ bd_test_data$ann_no_bd) bd_test_data %>% mutate(S_M2 = log_kfs - ann_no_bd) -> bd_test_data tibble(inter = summary(slr_ann_np_bd)$coefficients[1,1] %>% round(3), slope = summary(slr_ann_np_bd)$coefficients[2,1] %>% round(3), r2 = summary(slr_ann_np_bd)$adj.r.squared %>% round(2), E = sum(bd_test_data$S_M2) / length(bd_test_data$S_M2) %>% round(3), #E RMSE = (sum(bd_test_data$S_M2^2)/length(bd_test_data$S_M2))^0.5 %>% round(3), d = (1- sum(bd_test_data$S_M2^2)/sum((abs(bd_test_data$log_kfs-mean(bd_test_data$ann_no_bd))+ abs(bd_test_data$ann_no_bd-mean(bd_test_data$ann_no_bd)))^2)) %>% round(3) ) ## sscbd: with no scaled ssc, ann model ann_wt_bd <- neuralnet(log_kfs ~ Percent_Sand + Percent_Silt + Percent_Clay + BulkDensity_gcm3, data=bd_test_data, hidden=c(5,3), linear.output=T, stepmax=1e6) ## train dataset bd_test_data$ann_wt_bd <- neuralnet::compute(ann_wt_bd, bd_test_data[,c('Percent_Sand' , 'Percent_Silt' , 'Percent_Clay', 'BulkDensity_gcm3')])$net.result slr_ann_wt_bd <- lm(bd_test_data$log_kfs ~ bd_test_data$ann_wt_bd) bd_test_data %>% mutate(S_M3 = log_kfs - ann_wt_bd) -> bd_test_data tibble(inter = summary(slr_ann_wt_bd)$coefficients[1,1] %>% round(3), slope = summary(slr_ann_wt_bd)$coefficients[2,1] %>% round(3), r2 = summary(slr_ann_wt_bd)$adj.r.squared %>% round(2), E = sum(bd_test_data$S_M3) / length(bd_test_data$S_M3) %>% round(3), #E RMSE = (sum(bd_test_data$S_M3^2)/length(bd_test_data$S_M3))^0.5 %>% round(3), d = (1- sum(bd_test_data$S_M3^2)/sum((abs(bd_test_data$log_kfs-mean(bd_test_data$ann_wt_bd))+ abs(bd_test_data$ann_wt_bd-mean(bd_test_data$ann_wt_bd)))^2)) %>% round(3) )
# response to review library(ggridges) r2results <- read.csv(here::here("vignettes/r2evaluation.csv")) r2results %>% dplyr::select(Group, R2, lnRMSE) %>% dplyr::rename("(a)" = R2, "(b)" = lnRMSE) %>% tidyr::gather(-Group, key = "Key", value = "Value") %>% ggplot(aes(Value, Group, fill = 0.5 - abs(0.5 - stat(ecdf)))) + stat_density_ridges(geom = "density_ridges_gradient", calc_ecdf = TRUE) + scale_fill_viridis_c(name = "Probability", direction = -1) + facet_grid(cols = vars(Key), scales = "free") + xlab(expression(" "~R^{2}~" log ["~RMSE~(cm~hr^{-1})~"]")) + ylab(element_blank()) + scale_y_discrete(labels= c("Other papers", "This study")) # ggsave("Figure5.jpg", width = 6, height = 3, dpi = 300, units = "in")
rf_no_bd_SC <- randomForest(log_kfs ~ Percent_Sand + Percent_Clay , data=bd_test_data, ntree = 100, mtry = 2, importance = TRUE, proximity = TRUE) bd_test_data$rf_no_bd_SC <- predict(rf_no_bd_SC, bd_test_data) cor(bd_test_data$rf_no_bd_SC, bd_test_data$log_kfs, method = c("spearman")) lm(bd_test_data$rf_no_bd_SC ~ bd_test_data$rf_no_bd) %>% summary()
bd_test_data %>% ggplot(aes(rf_no_bd, rf_no_bd_SC)) + geom_point() + geom_smooth(method = "lm") + geom_abline(slope = 1, intercept = 0, col = "red", linetype = "dotted") + labs(x = expression("Sand+Silt+Clay"~predicted~k[n]~(cm~hr^{-1}~","~log~transfered)), y = expression("Sand+Clay"~predicted~k[n]~(cm~hr^{-1}~","~log~transfered))) + annotate("text", x = -0.5, y = 2, label = "italic(R) ^ 2 == 0.97", parse = TRUE)
log(2.78) exp(log(2.78)) log10(2.78) 10^(log10(2.78))
# compare PTF of this study with ROSETTA PTFvsROSETTA <- read.csv(here::here("vignettes/PTFs-vs-ROSETTA.csv")) PTFvsROSETTA %>% dplyr::select(Group, Kfs) %>% mutate(Kfs = log(Kfs)) %>% ggplot(aes(Kfs, Group, fill = 0.5 - abs(0.5 - stat(ecdf)))) + stat_density_ridges(geom = "density_ridges_gradient", calc_ecdf = TRUE) + scale_fill_viridis_c(name = "Probability", direction = -1) + xlab(expression(k[n]~(cm~hr^{-1}))) + ylab(element_blank()) + scale_y_discrete(labels= c("(e) ROSETTA", "(d) RF-with-structure-no-rock", "(c) RF-no-structure-no-rock", "(b) ANN-no-structure-no-rock", "(a) Measured"))
# compare DR and minidisk kfs All_cite_new %>% filter(!is.na(K_minidisk_cmhr)) %>% select(K_minidisk_cmhr, `K_DR (cm/h)`) %>% ggplot(aes(K_minidisk_cmhr, `K_DR (cm/h)`)) + geom_point() + geom_smooth(method = "lm")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.