R/l2_cops_gen.R

Defines functions station_interval l2_cops_gen

#' @export
#' @import dplyr
#' @import lubridate
#' @import stringr
#'

#TO DO : Create a report of coherence between Casts in L1 and Synthesis

#Function to generate L2 COPS architecture from L1 and and a log table (ASCII)

#library(data.table)
#library(dplyr)
#library(stringr)
#library(lubridate)


#project <- "/home/raphael/TEST"
#setwd(project)
l2_cops_gen <- function(project){


	# set L1 and L2 absolute path
	L1 <- file.path(project,"L1","COPS")
	L2 <- file.path(project, "L2")

	# Check project before doing any manipulation, code must be protective
	CheckList <- check_project(project,L1,L2,set="COPS")

	if (CheckList["Proot"][[1]] == F) {stop("project path is not set at a project root folder")}
	if (CheckList["L2exists"][[1]] == T) {
	stop(paste0(project,"L2 structure for COPS is alredy present. You better be sure of what your doing ...\n
		take care of this by yourself !"))
		}

	# Read data_synthesis file, reference Master file for database organisation
	SyntheFile = list.files(path = project, pattern = "data_synthesis|Data_Synthesis", full.names = T)

	Synthesis <- data.table::fread(SyntheFile, data.table = F)

	# Check if ID column present and if it's numeric with correct pading (for Rstudio files sorting)
	if (all(names(Synthesis) != "ID")) {
		warning("No ID column is present \n one will be generated by ascending DateTime row number \n,
			   a copy of the data_synthesis will be made and ID colomn writen to new file")
		Synthesis <- Synthesis %>% mutate(DateTime = ymd_hm(paste0(Date, Time, sep = "T"))) %>%
			arrange(DateTime) %>% mutate(ID = row(Synthesis)[,1])

		file.copy(SyntheFile, str_replace(SyntheFile, ".csv","(copy).csv"))

	} else if (typeof(Synthesis$ID) == "integer") {
		Synthesis <- Synthesis %>% mutate(ID = stringr::str_pad(ID, 3, pad="0"))

	} else {
		stop("ID column is present but is not of type integer !")
	}

	# Check if multiple boat where present
	if (any(names(Synthesis) == "Boat") && length(unique(Synthesis["Boat"][[1]])) > 1) {

		# Merge potential multiple COPS into one column (boolean)
		if (length(grep("COPS",names(Synthesis))) > 1){
			COPS <- cbind(ifelse(Synthesis[grep("COPS",names(Synthesis))] == "T", T,F),
					    rowSums(ifelse(Synthesis[grep("COPS",names(Synthesis))] == "T", T,F)) > 0)

			Synthesis <- Synthesis %>% mutate(COPS = COPS[,3])
		}

		# Filter, time search done only for TRUE COPS station
		CopsTable <- Synthesis %>% filter(COPS == T) %>%
			mutate(DateTime = ymd_hm(paste(Date, Time, sep = "T"))) %>%
			select(Station, Boat, Date, Time, Lat, Lon, COPS)

		# Associate GPS file by station
		GPSpath <- file.path(L1, list.files(L1, pattern = "GPS", recursive = T))
		GPSdate <- ymd(str_extract(GPSpath, "[:digit:]{8}"))
		GPSboat <- str_extract(GPSpath, "(?<=/)[:alpha:]+(?=_[:digit:]{8})")
		GPSTable <- data.frame(Path = GPSpath, Date = GPSdate, Boat = GPSboat, stringsAsFactors=FALSE)

		IDlist <- c()
		for(i in 1:length(CopsTable[,1])){
			if(any(lubridate::date(CopsTable$DateTime)[i] == GPSTable$Date)){
				GPSfiles <- GPSTable$Path[which(lubridate::date(CopsTable$DateTime)[i] == GPSTable$Date)]

				if(length(GPSfiles) == 1 && str_detect(GPSfiles, ".csv")){
					IDlist[i] <- GPSfiles[str_detect(GPSfiles, ".csv")]

				} else if(length(GPSfiles) > 1 && file.exists(GPSfiles[str_detect(GPSfiles, ".csv")])){
					IDlist[i] <- GPSfiles[str_detect(GPSfiles, ".csv")]
				}
			}
			else{
				IDlist[i] <- NA
				message(CopsTable$ID[i],"no .csv GPS file")
			}
		}
		CopsTable <- CopsTable %>% mutate(GPS = IDlist)


		# Create L1Table of all URC casts and extract relevant information for association
		L1List <- list.files(L1, pattern = "URC", recursive = T)
		Name <- str_extract(L1List, "(?<=/).+$")
		Boat <- str_extract(L1List, "[:alpha:]+(?=_)")
		Date <- as.Date.character(gsub("^", "\\120" ,str_extract(L1List, "(?<=CAST_[:digit:]{3}_)[:digit:]{6}"), perl = T), format = "%Y%m%d")
		Time <- gsub("(\\d{2})(?=\\d{2})", "\\1:", str_extract(L1List, "[:digit:]{6}(?=_URC)"), perl = TRUE)
		Cast <- str_extract(L1List, "(?<=CAST_)[:digit:]{3}")
		L1Table <- data.frame(L1path = file.path(L1, L1List), Name, Boat, DateTime=ymd_hms(paste(Date, Time, sep = "T")), Cast)

		# Create column of time interval (group by acquisition platform (boat, ...) used on field)
		CopsTable <- CopsTable %>% group_by(Boat) %>% arrange(DateTime, .by_group = T) %>%
			mutate(start =  int_start(station_interval(DateTime)),
				  end = int_end(station_interval(DateTime)))

		# Tow steps because class "interval" is destroyed by row_bind() used in group_by() operation
		CopsTable <- CopsTable %>% ungroup() %>% mutate(inters = interval(start, end))

		# Associate cast with station based on a time interval
		IDlist <- list()
		for(i in 1:length(L1Table[,1])){
			if(any(L1Table$DateTime[i] %within% CopsTable$inters & L1Table$Boat[i] == CopsTable$Boat)){
				IDlist[i] <- CopsTable$StationID[which(L1Table$DateTime[i] %within% CopsTable$inters & L1Table$Boat[i] == CopsTable$Boat)]
			}
			else{
				IDlist[i] <- NA
			}
		}
		L1Table <- L1Table %>% mutate(StationID = IDlist)

		# Create L2 structure
		L2COPS <- file.path(L2, paste0(gsub("-", "", as.character(lubridate::date(CopsTable$DateTime))),
									 "_Station", CopsTable$StationID),paste0("COPS_",CopsTable$Boat))
		for(i in L2COPS){
			dir.create(i, recursive = T)
			write(i, file = file.path(i, "directories.for.cops.dat"))
		}
		write(L2COPS, file = file.path(L2, "directories.for.cops.dat"))
		# Return(dirdats = file.path(L2, "directories.for.COPS.dat"))

		# Copy GPS file in each station
		CopsTable <- CopsTable %>% mutate(L2path = L2COPS)
		file.copy(CopsTable$GPS, file.path(CopsTable$L2path, str_extract(CopsTable$GPS, "GPS_[:digit:]{6}\\.[:alpha:]{3}")))

		# Create L2path for each matched cast and copy
		L1Table <- L1Table %>% filter(StationID != "NA") %>%
			mutate(L2path = file.path(L2, paste0(gsub("-", "", as.character(lubridate::date(DateTime))),
											"_Station", StationID),paste0("COPS_",Boat), Name))

		L1files <- as.character(L1Table$L1path)
		L2path <- L1Table$L2path
		file.copy(L1files, L2path, overwrite = F)

		# Generate report
		#report <- file(paste0("Gen_COPS_L2_report_",Sys.Date(),".txt"))
		#cat(paste0("Station : ",CopsTable$StationID,"\n"), file = report , append = F)
		#cat(paste0("Number of COPS copied = "), file = report , append = T)

	} else {

		CopsTable <- Synthesis %>% filter(COPS == "T") %>%
			mutate(DateTime = ymd_hms(paste(Date, Time, sep = "T"))) %>%
			select(ID,Station, DateTime, Lat, Lon, COPS)

		# Associate GPS file by station
		GPSpath <- file.path(L1, list.files(L1, pattern = "GPS", recursive = T))
		GPSdate <- lubridate::ymd(stringr::str_extract(GPSpath, "[:digit:]{8}"))
		GPSTable <- data.frame(Path = GPSpath, Date = GPSdate, stringsAsFactors=FALSE)

		IDlist <- c()
		for(i in 1:length(CopsTable[,1])){
			if(any(lubridate::date(CopsTable$DateTime)[i] == GPSTable$Date)){
				GPSfiles <- GPSTable$Path[which(lubridate::date(CopsTable$DateTime)[i] == GPSTable$Date)]

				if(length(GPSfiles) == 1 && str_detect(GPSfiles, ".csv")){
					IDlist[i] <- GPSfiles[str_detect(GPSfiles, ".csv")]

				} else if(length(GPSfiles) > 1 && file.exists(GPSfiles[str_detect(GPSfiles, ".csv")])){
					IDlist[i] <- GPSfiles[str_detect(GPSfiles, ".csv")]
				}
			}
			else{
				IDlist[i] <- NA
				message(CopsTable$ID[i],"no .csv GPS file")
			}
		}
		CopsTable <- CopsTable %>% mutate(GPS = IDlist)

		# Create L1Table of all URC casts and extract relevant information for association
		L1List <- list.files(L1, pattern = "URC", recursive = T)
		Name <- stringr::str_extract(L1List, "(?<=/).+$")
		Date <- as.Date.character(gsub("^", "\\120" ,str_extract(L1List, "(?<=CAST_[:digit:]{3}_)[:digit:]{6}"), perl = T), format = "%Y%m%d")
		Time <- gsub("(\\d{2})(?=\\d{2})", "\\1:", str_extract(L1List, "[:digit:]{6}(?=_URC)"), perl = TRUE)
		Cast <- stringr::str_extract(L1List, "(?<=CAST_)[:digit:]{3}")
		L1Table <- data.frame(L1path = file.path(L1, L1List), Name, DateTime=ymd_hms(paste(Date, Time, sep = "T")), Cast)

		# Create column of time interval, arrange by DateTime ensure proper Time sorting for all further operation
		CopsTable <- CopsTable %>% arrange(DateTime) %>%
			mutate(start =  int_start(station_interval(DateTime)),
				  end = int_end(station_interval(DateTime))) %>%
			mutate(inters = interval(start, end))

		# Associate cast with station based on a time interval
		IDlist <- list()
		for(i in 1:length(L1Table[,1])){
			if(any(L1Table$DateTime[i] %within% CopsTable$inters)){
				IDlist[i] <- CopsTable$ID[which(L1Table$DateTime[i] %within% CopsTable$inters)]
			}
			else{
				IDlist[i] <- NA
			}
		}
		L1Table <- L1Table %>% mutate(ID = IDlist)

		# Create L2 structure
		L2COPS <- file.path(L2, str_pad(CopsTable$ID, 3, pad="0"), "COPS")

		for(i in L2COPS){
			dir.create(i, recursive = T)
			write(i, file = file.path(i, "directories.for.cops.dat"))
		}
		write(L2COPS, file = file.path(L2, "directories.for.cops.dat"))
		# Return(dirdats = file.path(L2, "directories.for.COPS.dat"))

		# Copy GPS file in each station
		CopsTable <- CopsTable %>% mutate(L2path = L2COPS)
		file.copy(CopsTable$GPS, file.path(CopsTable$L2path, str_extract(CopsTable$GPS, "GPS_[:digit:]{6}\\.[:alpha:]{3}")))

		# Create L2path for each matched cast and copy
		L1Table <- L1Table %>% filter(ID != "NA") %>%
			mutate(L2path = file.path(L2, ID,"COPS", Name))

		L1files <- as.character(L1Table$L1path)
		L2path <- L1Table$L2path
		file.copy(L1files, L2path, overwrite = F)
	}
}

# Fallback function to create interval with boundary between (midle) each station time
# first and last with 1800 seconds
station_interval <- function(x){
	int_diff(c(x[1]-1800,
			 (int_length(int_diff(x))/2)+x[-length(x)],
			 x[length(x)]+1800))
}
raphidoc/lighthouse documentation built on June 13, 2022, 10:06 a.m.