WIP/Old_and_various/Phenorice_Statistics_Simple.R

# TODO: Add comment
# 
# Author: LB
###############################################################################



# TODO: Add comment
# 
# Author: LB
###############################################################################

LB_Create_pol_grid = function(in_raster_file, out_res, out_folder, out_name_shp,grid_extent){
	
	library(rgdal)
	library(raster)
	rast<- raster(in_raster_file)
#	bb <- bbox(rast)                          # Define Bounding box
	cs <- c(out_res,out_res)  # cell size. To be set by hand! 
	
	cc <- c(grid_extent$xmin,grid_extent$ymin) + (cs/2)  # cell offset   # move 1/2 pixel if necessary
	
	# compute number of cells per direction
	if (cs < 300) {				# needed to avoid that larger grids cover only one portion of the area --> so, add a row/coulmn for larger grids
		cd <- ceiling(c(((grid_extent$xmax-grid_extent$xmin)/cs[1]),((grid_extent$ymax-grid_extent$ymin)/cs[2])))-c(0,1)
	} else {
		cd <- ceiling(c(((grid_extent$xmax-grid_extent$xmin)/cs[1]),((grid_extent$ymax-grid_extent$ymin)/cs[2])))
	}
	grd <- GridTopology(cellcentre.offset=cc, cellsize=cs, cells.dim=cd)   # Define grd characteristics
	
	#transform to spatial grid
	sp_grd <- SpatialGridDataFrame(grd,
			data=data.frame(id=1:prod(cd)),
			proj4string=CRS(proj4string(rast)))
	summary(sp_grd)
#	browser()
	sp_polygons = as(sp_grd, "SpatialPolygonsDataFrame")#create the shape
	print('Creating MODIS Grid - Please Wait !')
	writeOGR(sp_polygons,dsn = out_folder, layer =out_name_shp,driver="ESRI Shapefile", overwrite_layer = T) #save the shapefile
	gc()
}

library(ggplot2)
library(reshape)
library(data.table)
library(rgdal)
library(SDMTools)
library(plyr)
library(gdalUtils)
library(raster)
library(foreign)
library(tools)
memory.limit(16000)
{{
		
# Define inputs and outputs
		main_folder = 'Z:/Processing_v21/old_min_identification/Outputs' 
		results_folder = file.path(main_folder,'ndii_new/2013/raster')			# Folder containinf phenorice results
		outputs_folder = file.path(main_folder,'ndii_new/postprocess_temporary')		# Folder where to put results of this processing
		temp_files_folder = file.path(main_folder,'normal_new/postprocess/temp')
		
		in_shape_admin_file = file.path(main_folder,'Analysis/Boundaries/com2011/com2011.shp')   # shape of admin areas - Lomb
		
		hans_mask_file = 'D:/Temp/PHL_Analysis/Ancillary/Masks/Hansen_Forest_Map/Hansen_GFC2013_treecover2000_20N_120E_GAIN-LOSS_clip_FORESTperc_SINU.tif' #Mask of forests - PHL
		mask_hansen = F	
		
		
#		in_shape_admin_file = file.path(main_folder,'Ancillary/Shapes/Admin_PHL.shp')   # shape of admin areas - PHL
		out_mod_grid_df_file = file.path(outputs_folder,'out_df_modgrid.RData')   # output rdata file used to store results
		dir.create(dirname(out_mod_grid_df_file), recursive = T)
		dir.create(outputs_folder, recursive = T)
		
# Extent of the "reference grid" used - tipically a subset of the MODIS image including the full extent
# of the "reference" map - the corners specified below should correspond to upper and left corners of
# the subset of the MODIS image to be considered (so, no "random" approximate coordinates)
		grid_folder = file.path(outputs_folder,'GRIDs')   #save the grid here		
#		grid_extent = data.frame(xmin = 13557688.37,ymin = 1188165.46, xmax = 13650350.93, ymax =  1280828.02)			#Grid - PHL # Reduce this extent otherwise you will run out of memory !
		
		grid_extent = data.frame(xmin = 612373.40+231.656358*150,ymin = 4961432.228+231.656358*150, xmax = 914453.2908-231.656358*600, ymax =  5087916.60000-231.656358*100)			#Grid - Lomb
		
		grid_resolutions = c(231.656358)		# Resolution of output grids
		grid_shapes <- c('Grid_MODIS_250')		 #this is the grid name
		dir.create(grid_folder)
		
		
# retrieve filenames of Phrice outputs to be used
		in_file = list.files(results_folder, glob2rx('*Full_Out*dat$'),full.names  = TRUE)
#		in_maxs_file = list.files(results_folder, glob2rx('*map_max*dat'),full.names  = TRUE)
#		in_mins_file = list.files(results_folder, glob2rx('*map_min*dat'),full.names  = TRUE)
#		in_ndetect_file = list.files(results_folder, glob2rx('*map_rice*dat'),full.names  = TRUE)
		
# Definition of names of "Intermediate" files required for processing
		out_ref_raster= file.path(main_folder,'Analysis/Reference','Reproj','Reference_Map_SIN.tif')
		dir.create(dirname(out_ref_raster))
		out_grid_rasters = file.path(temp_files_folder,paste(grid_shapes,'_raster_HR.tiff',sep = ''))
		out_rice_file = file.path(temp_files_folder,'ricefil_clipped.tiff' )
#		out_mins_file = file.path(temp_files_folder,'doys_min_map_clipped.tiff' )
#		out_maxs_file = file.path(temp_files_folder,'doys_max_map_clipped.tiff' )
#		out_nrice_file = file.path(temp_files_folder,'rice_map_clipped.tiff' )
		out_ref_hansen = file.path(main_folder,'Ancillary/Masks/Hansen_Forest_Map','Hansen_HR.tiff')
		
		grid_fullnames = paste(grid_folder,'/',grid_shapes,'.shp',sep = '')
		out_grid_df_files = file.path(outputs_folder,paste('out_df_',grid_shapes,'.RData', sep = ''))
		out_grid_shp_files = file.path(outputs_folder,paste('out_shp_',grid_shapes,'.shp', sep = ''))
		
		in_rast = raster(in_file)   # Get the proj4 of the input rasters
		in_proj = proj4string(in_rast)
		
	}}

#- ------------------------------------------------------------------------------- -#
#  Start the processing
#- ------------------------------------------------------------------------------- -#

#- ------------------------------------------------------------------------------- -#
# Create the regular grids (MODIS + 2,5,10,20 K) if needed  
#- ------------------------------------------------------------------------------- -#
{{
		
		for (gr_res in seq(along = grid_fullnames)) {
			
			
			if(file.exists(grid_fullnames[gr_res]) == FALSE) {
				LB_Create_pol_grid(in_file , grid_resolutions[gr_res],grid_folder, grid_shapes[gr_res],grid_extent)		
			}
			
		} #End Cycle on gr_res
	}}
#- ------------------------------------------------------------------------------- -#
# Reproject the reference raster map to MODIS projection
#- ------------------------------------------------------------------------------- -#
{{
		in_ref = raster(phrice_refmap_orig)
		in_ref_proj = proj4string(in_ref)
		if(file.exists(out_ref_raster) == FALSE) {
			ref_reproj = gdalwarp(phrice_refmap_orig,out_ref_raster, s_srs = in_ref_proj, t_srs = in_proj,
					r = "near",of = 'GTiff', overwrite = T, srcnodata = 128, dstnodata = 255,ot = 'Byte', output_Raster = TRUE)
			ext_ref_repr = extent(ref_reproj)
			res_ref_reproj = res(ref_reproj)
		} else {
			ref_reproj = raster(out_ref_raster)
			ext_ref_repr = extent(ref_reproj)
			res_ref_reproj = res(ref_reproj)
		}
	}}

# Start cyclying on grids --> On cycle one use the grid @ modis resolution to summarize results
# and compute results of "standard" accuracy analysis. On later  cycle ,compute results aggregated
#on hte various resolutions grids

for (cycle in seq(along = grid_shapes)) {
	
	
	if (cycle == 1) {   
#- ------------------------------------------------------------------------------- -#
#  # 1st cycle --> 
#-       1) Perform "tabulate areas" to retrieve the percentage area classified as rice 
#-				  in the refernce map for ech MODIS pixel
#-       2) Perform "tabulate areas" to retrieve the average forest fractional cover according to 
#-          Hansen map for each MODIS pixel
#-       3) Associate info on Administrative Areas for each MODIS pixel
#-			 4) Save results in out_mod_grid_df.RData
#-       5) Add computed info to the DBF of the mod_grid_250 shapefile 
#-	
#- ------------------------------------------------------------------------------- -#
		{{
				#- ------------------------------------------------------------------------------- -#
				# If not already existing, Rasterize polygons on the extent of the reference map, at a resolution 
				# equal to that of the reference map. For each pixel, values of this raster correspond to a
				# unique identifier of the cell of the  ERMES grid to which the pixel belongs
				#- ------------------------------------------------------------------------------- -#
				
				{{	
#						if (file.exists(out_grid_rasters[cycle]) == FALSE){
						dir.create(dirname(out_grid_rasters[cycle]))
						raster_grid = gdal_rasterize(grid_fullnames[cycle],out_grid_rasters[cycle],a = "id", output_Raster = T,
								te = c(ext_ref_repr@xmin, (ext_ref_repr@ymin+res_ref_reproj[1]),(ext_ref_repr@xmax-res_ref_reproj[1]), (ext_ref_repr@ymax)), ot = 'UInt32',tr = res_ref_reproj, tap = T, of = 'GTiff')
						gc()
#						}
					}}
				#- ------------------------------------------------------------------------------- -#
				# Compute the average forest cover for each pixel   
				#- ------------------------------------------------------------------------------- -#	
				{{ if (mask_hansen == T) {
							# Get the values of the "0000dummy" raster of cells codes
							raster_cells <- new("GDALReadOnlyDataset", out_grid_rasters[cycle])
							data_raster_cells = getRasterData(raster_cells)
							dim(data_raster_cells) = dim(data_raster_cells)[1]*dim(data_raster_cells)[2]
							GDAL.close(raster_cells)
							gc()
							
							# Resize Hansen Mask and increase resolution, then get the values
							
							if (file.exists (out_ref_hansen) == FALSE) {
								gdalwarp(hans_mask_file,out_ref_hansen, s_srs = in_proj, t_srs = in_proj,
										te = c(ext_ref_repr@xmin, (ext_ref_repr@ymin+res_ref_reproj[1]),(ext_ref_repr@xmax-res_ref_reproj[1]), (ext_ref_repr@ymax)),
										r = "near",of = 'GTiff', overwrite = T, srcnodata = 128, dstnodata = 255,ot = 'Byte', output_Raster = TRUE,tr = res_ref_reproj, tap = T)
							} 
							# Get the values of the Hansen Mask 
							hans_rast = new("GDALReadOnlyDataset",out_ref_hansen)
							hans_data =  getRasterData(hans_rast)
							dim(hans_data) = dim(hans_data)[1]*dim(hans_data)[2]
							GDAL.close(hans_rast)
							gc()
							
							# Create a Data.table with two columns. First column taken from the cell_codes raster - tells to which cell each pixel correspond
							# Second column taken from the forest cover map - tells which forest cover values correspond to each cell
							
							cells_dt = data.table(id=data_raster_cells,value = hans_data )
							setkey(cells_dt, id)  # Set a indexing key to increase speed of zonal statistics computation
							rm(data_raster_cells)
							rm(hans_data)
							gc()
							
# Compute the aggregated values for each cell of the ERMES grid - average, stdev, min, max and n° of non no-data pixels in each cell
							aggregate_hans_values = cells_dt[, list(average = as.numeric(mean(value, na.rm = T)),
											stdev = as.numeric(sd(value, na.rm = T)),
											n_cells = as.numeric(length(value))
									), by = key(cells_dt)]
							
							aggregate_hans_values = aggregate_hans_values[id != 0 ]		# Remove the values for areas outdide the MODIS grid
						}}
				}

				#- ------------------------------------------------------------------------------- -#
				# Retrieve necessary data from the phenorice output rasters 
				#- ------------------------------------------------------------------------------- -#
				{{
#						ndetect = raster(fullout_file, band = 1)
#						mindoy = raster(fullout_file, band = 4)
#						maxdoy = raster(fullout_file, band = 8)
#						
#						tempndetect_file = file.path(temp_files_folder, 'ndetect.tif')
#						tempmindoy_file = file.path(temp_files_folder,  'mindoy.tif')
#						tempmaxdoy_file = file.path(temp_files_folder,  'maxdoy.tif')
#						
						gdalwarp(in_file,out_rice_file, te = c(grid_extent$xmin,grid_extent$ymin,grid_extent$xmax,grid_extent$ymax), overwrite = T)
						
						#- Retrieve the values of the number of rice season
#						gdalwarp(nrice,out_nrice_file, te = c(grid_extent$xmin,grid_extent$ymin,grid_extent$xmax,grid_extent$ymax), overwrite = T)
						rice_rast = new("GDALReadOnlyDataset", out_rice_file)
						data_rice = getRasterData(rice_rast)
						
						data_nrice = data_rice[,,1]  ; dim(data_nrice) = dim(data_nrice)[1]*dim(data_nrice)[2]
						mins_q1 = data_rice[,,2] ; dim(mins_q1) = dim(mins_q1)[1]*dim(mins_q1)[2]
						mins_q2 = data_rice[,,3] ; dim(mins_q2) = dim(mins_q2)[1]*dim(mins_q2)[2]
						mins_q3 = data_rice[,,4] ; dim(mins_q3) = dim(mins_q3)[1]*dim(mins_q3)[2]
						mins_q4 = data_rice[,,5] ; dim(mins_q4) = dim(mins_q4)[1]*dim(mins_q4)[2]
						maxs_q1 = data_rice[,,6] ; dim(maxs_q1) = dim(maxs_q1)[1]*dim(maxs_q1)[2]
						maxs_q2 = data_rice[,,7] ; dim(maxs_q2) = dim(maxs_q2)[1]*dim(maxs_q2)[2]
						maxs_q3 = data_rice[,,8] ; dim(maxs_q3) = dim(maxs_q3)[1]*dim(maxs_q3)[2]
						maxs_q4 = data_rice[,,9] ; dim(maxs_q4) = dim(maxs_q4)[1]*dim(maxs_q4)[2]
						GDAL.close(rice_rast)
												
						# Get the cell_ids from the dbf of the grd shapefile and create a dataframe
						phrice_res_df = read.dbf(file.path(grid_folder,paste(grid_shapes[cycle],'.dbf', sep = '')))  
						phrice_res_df = data.frame(id = phrice_res_df$id)
						phrice_res_df$n_seasons = as.numeric(data_nrice)
						phrice_res_df$min_q1 = as.numeric(mins_q1)
						phrice_res_df$min_q2 = as.numeric(mins_q2)
						phrice_res_df$min_q3 = as.numeric(mins_q3)
						phrice_res_df$min_q4 = as.numeric(mins_q4)
						phrice_res_df$max_q1 = as.numeric(maxs_q1)
						phrice_res_df$max_q2 = as.numeric(maxs_q2)
						phrice_res_df$max_q3 = as.numeric(maxs_q3)
						phrice_res_df$max_q4 = as.numeric(maxs_q4)
						phrice_res_df [phrice_res_df == 0 ] = NA
						phrice_res_df$min_q1 [which(phrice_res_df$min_q1 >= 400 )] = NA
						phrice_res_df$min_q2 [which(phrice_res_df$min_q2  >= 400 )] = NA
						phrice_res_df$min_q3 [which(phrice_res_df$min_q3  >= 400 )] = NA
						phrice_res_df$min_q4 [which(phrice_res_df$min_q4  >= 400 )] = NA
						phrice_res_df$max_q1 [which(phrice_res_df$max_q1 >= 400 )] = NA
						phrice_res_df$max_q2 [which(phrice_res_df$max_q2  >= 400 )] = NA
						phrice_res_df$max_q3 [which(phrice_res_df$max_q3  >= 400 )] = NA
						phrice_res_df$max_q4 [which(phrice_res_df$max_q4  >= 400 )] = NA
						phrice_res_df$length_q1 = 	(phrice_res_df$max_q1+365)-(phrice_res_df$min_q1+365)
						phrice_res_df$length_q2 = 	(phrice_res_df$max_q2+365)-(phrice_res_df$min_q2+365)
						phrice_res_df$length_q3 = 	(phrice_res_df$max_q3+365)-(phrice_res_df$min_q3+365)
						phrice_res_df$length_q4 = 	(phrice_res_df$max_q4+365)-(phrice_res_df$min_q4+365)
						
						rm(mins_q1,mins_q2,mins_q3,mins_q4)
						rm(maxs_q1,maxs_q2,maxs_q3,maxs_q4)
						rm(data_maxs,data_mins)
						gc()
					}}		
				
#- ------------------------------------------------------------------------------- -#
# Get all the retrieved datasets and put them in a single data frame and save outputs
#- ------------------------------------------------------------------------------- -#
				
				{{
						# Get data for forest cover, number of cells, rice fractional cover
						if (mask_hansen == T) {				
							
							out_df_modgrid = data.frame(id = aggregate_reference_values$id, ncells = aggregate_reference_values$n_cells,
									ref_rice_fc = aggregate_reference_values$ref_rice_fc, hans_forcov = aggregate_hans_values$average)
						} else {
							out_df_modgrid = data.frame(id = aggregate_reference_values$id, ncells = aggregate_reference_values$n_cells,
									ref_rice_fc = aggregate_reference_values$ref_rice_fc)
						}
						
						
						# Join with the data retrieved from the rasters
#						browser()
						out_df_modgrid = join(out_df_modgrid,phrice_res_df, by = 'id', type = 'left' )
						is.na(out_df_modgrid) <- do.call(cbind,lapply(out_df_modgrid, is.infinite))  # Convert Infinite to NA
						is.na(out_df_modgrid) <- do.call(cbind,lapply(out_df_modgrid, is.nan))
						# Compute the ricearea form the percentage of rice area+ncells+resolution of reference map + compute
						# total non-nodata area of the pixel 
							
						#Retrieve and add info regarding administrative units
						in_shape_administrative = readOGR(dirname(in_shape_admin_file),file_path_sans_ext(basename(in_shape_admin_file)))  #TBC
						in_shape_admin_reproj = spTransform(in_shape_administrative, CRS(in_proj))
						centroids = getSpPPolygonsLabptSlots(readOGR(grid_folder,grid_shapes[cycle]))	# get coordinates of centroids of each 250m cell
						centroids = SpatialPoints(centroids,proj4string =  CRS(in_proj)) # convert to spatialpoints
						
						# do an "intersect" with the admin_areas shapes --> returns a df with the id of the cell, plus the "administrative units" info
						# extracted from the polygin in which the centroid of the cell falls
						admin_cells = over(centroids,in_shape_admin_reproj)
						
						###### NEED TO BE CHANGED IF THE STRUCTURE OF THE ADMIN DATA IS DIFFERENT !!!! ####
						#admin_cells = admin_cells [,c("NAME_1","NAME_2")]
						
						admin_cells = admin_cells [,c("COD_REG","COD_PRO")] 
						admin_cells$id = phrice_res_df$id
						names(admin_cells)[1:2] = c('Region','Province')
						out_df_modgrid = join(out_df_modgrid,admin_cells, by = 'id', type = 'left' )  # join to total df the info on Region and Province
						
						# SAve the outputs
						save(out_df_modgrid, file = out_mod_grid_df_file)		# Save the RData table
						out_shp_dbf = data.frame(id = phrice_res_df$id)			# Retrieve cell ids from the "empty grid
						out_shp_dbf = join(out_shp_dbf,out_df_modgrid, by = 'id', type = 'left' )  # join the info using id as base
						write.dbf(out_shp_dbf, file.path(grid_folder,paste(grid_shapes[cycle],'.dbf', sep = ''))) # overwrite the dbf table of the MOD 250m grid shapefile
						gc()
					}}	
				
			}}
	
} #End for on Cycle
lbusett/phenoriceR documentation built on May 18, 2019, 9:17 p.m.