knitr::opts_chunk$set(echo = TRUE)
library(knitr)
library(kableExtra)

Introduction

I want to select 1000 samples per class. To do so, I will also consider to keep the same amount of samples per polygons, or at least the same proportion.

Importing data

stat_metrics = NULL
overall_accuracy = NULL


for(iter in 1:3){

file_directory = 'C:\\IPSTERS\\sraster\\ins4\\sampling_0'
list_files = lapply(list.files(file_directory),function(x){paste0(file_directory,"\\",x)})

group_by = 'Object'
nsamples = 7000

sampling_strata <- function(file_x, nsamples, group_by ){
  x = read.csv(file_x,sep = ",", header = TRUE)
  n_total_samples = nrow(x)

  samples_strata = function(y,n_total_samples,nsamples){
    if(n_total_samples < nsamples)
    {
      nsamples = n_total_samples
      message("we have less samples that those expected")
    }
    n_rows = nrow(y)
    perc_samples = n_rows/n_total_samples
    n_samples_class = round(nsamples * perc_samples)
    random_index = sample(1:n_rows,size = n_samples_class,replace = FALSE)
    return(y[random_index,])
    }

  #split
  x_split = split(x,x[,group_by])
  x_split_random = lapply(x_split, samples_strata, n_total_samples, nsamples)
  x_random = do.call("rbind", x_split_random)
  return(x_random)
  cat(file_x)
}

data_random_split = lapply(list_files, sampling_strata, nsamples, group_by)

#===============================
#classification random forest
#===============================
#Since we have 22 polygons per class, I will select 15 for training an 7 for testing. 
function_train_selection <- function(x){
  polygons_class = unique(x$Object)
  ind = sample(2, length(polygons_class), replace = TRUE, prob = c(0.7,0.3))
  train_class = polygons_class[ind==1]
  test_class = polygons_class[ind==2]
  train_df = x[x$Object %in% train_class,]
  train_df$type = "Training"
  test_df = x[x$Object %in% test_class,]
  test_df$type = "Test"
  df_result = rbind(train_df,test_df)
  return(df_result)
}

data_random_split2 = lapply(data_random_split, function_train_selection)

data_random = do.call("rbind",data_random_split2)
#write.csv(data_random,"output5.csv")


#===============================
#Removing nans
#===============================

remove_na_df = function(x){
  x$row <- 1:nrow(x)
  list_rows = split(x,x$row)
  list_rows_wn = lapply(list_rows,function(r){if(all(!is.na(r))){return(r)}})
  result_df = do.call("rbind", list_rows_wn)
  result_df$row <- NULL
  return(result_df)
}

data_random2 = remove_na_df(data_random)

train = data_random2[data_random3$type == "Training",-c(1,2,3,96)]
test = data_random2[data_random3$type == "Test",-c(1,2,3,96)]

#===============================
#Modelling
#===============================
#set.seed(222)
model_rf = randomForest::randomForest(Label~. , data = train,ntree = 500)
print(model_rf)
randomForest::importance(model_rf)

#===============================
#Prediction
#===============================

library(caret)
pred_test = predict(model_rf, test)
result_test = caret::confusionMatrix(pred_test, test$Label)

#===============================
#Accuracies
#===============================

stat_metrics = rbind(stat_metrics, result_test$byClass[,c(1,3,7)])
overall_accuracy = rbind(overall_accuracy,result_test$overall)


cat("ready",iter)
}
#gc()
#rm(list=ls())

#write.csv(stat_metrics,"S3_stat_metrics_7000s_BD0.csv")
#write.csv(overall_accuracy ,"S3_overall_accuracy_7000s_DB0.csv")


MapSentinel/sraster documentation built on April 24, 2020, 5:43 a.m.