knitr::opts_chunk$set(echo = TRUE) library(knitr) library(kableExtra)
I want to select 1000 samples per class. To do so, I will also consider to keep the same amount of samples per polygons, or at least the same proportion.
stat_metrics = NULL overall_accuracy = NULL for(iter in 1:3){ file_directory = 'C:\\IPSTERS\\sraster\\ins4\\sampling_0' list_files = lapply(list.files(file_directory),function(x){paste0(file_directory,"\\",x)}) group_by = 'Object' nsamples = 7000 sampling_strata <- function(file_x, nsamples, group_by ){ x = read.csv(file_x,sep = ",", header = TRUE) n_total_samples = nrow(x) samples_strata = function(y,n_total_samples,nsamples){ if(n_total_samples < nsamples) { nsamples = n_total_samples message("we have less samples that those expected") } n_rows = nrow(y) perc_samples = n_rows/n_total_samples n_samples_class = round(nsamples * perc_samples) random_index = sample(1:n_rows,size = n_samples_class,replace = FALSE) return(y[random_index,]) } #split x_split = split(x,x[,group_by]) x_split_random = lapply(x_split, samples_strata, n_total_samples, nsamples) x_random = do.call("rbind", x_split_random) return(x_random) cat(file_x) } data_random_split = lapply(list_files, sampling_strata, nsamples, group_by) #=============================== #classification random forest #=============================== #Since we have 22 polygons per class, I will select 15 for training an 7 for testing. function_train_selection <- function(x){ polygons_class = unique(x$Object) ind = sample(2, length(polygons_class), replace = TRUE, prob = c(0.7,0.3)) train_class = polygons_class[ind==1] test_class = polygons_class[ind==2] train_df = x[x$Object %in% train_class,] train_df$type = "Training" test_df = x[x$Object %in% test_class,] test_df$type = "Test" df_result = rbind(train_df,test_df) return(df_result) } data_random_split2 = lapply(data_random_split, function_train_selection) data_random = do.call("rbind",data_random_split2) #write.csv(data_random,"output5.csv") #=============================== #Removing nans #=============================== remove_na_df = function(x){ x$row <- 1:nrow(x) list_rows = split(x,x$row) list_rows_wn = lapply(list_rows,function(r){if(all(!is.na(r))){return(r)}}) result_df = do.call("rbind", list_rows_wn) result_df$row <- NULL return(result_df) } data_random2 = remove_na_df(data_random) train = data_random2[data_random3$type == "Training",-c(1,2,3,96)] test = data_random2[data_random3$type == "Test",-c(1,2,3,96)] #=============================== #Modelling #=============================== #set.seed(222) model_rf = randomForest::randomForest(Label~. , data = train,ntree = 500) print(model_rf) randomForest::importance(model_rf) #=============================== #Prediction #=============================== library(caret) pred_test = predict(model_rf, test) result_test = caret::confusionMatrix(pred_test, test$Label) #=============================== #Accuracies #=============================== stat_metrics = rbind(stat_metrics, result_test$byClass[,c(1,3,7)]) overall_accuracy = rbind(overall_accuracy,result_test$overall) cat("ready",iter) }
#gc() #rm(list=ls()) #write.csv(stat_metrics,"S3_stat_metrics_7000s_BD0.csv") #write.csv(overall_accuracy ,"S3_overall_accuracy_7000s_DB0.csv")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.