MODISSummaries <-
function(LoadDat, FileSep = NULL, Dir = ".", Product, Bands, ValidRange, NoDataFill, ScaleFactor, StartDate = FALSE,
QualityScreen = FALSE, QualityBand = NULL, QualityThreshold = NULL, Mean = TRUE, SD = TRUE, Min = TRUE, Max = TRUE,
Yield = FALSE, Interpolate = FALSE, InterpolateN = NULL, DiagnosticPlot = FALSE)
{
# DEFINE
NUM_METADATA_COLS <- 10
if(Dir == '.') cat('MODIS data files from ', getwd(),
' will be summarised.\nSummary files will be written to the same directory.\n', sep = '')
if(Dir != '.') cat('MODIS data files from ', Dir,
' will be summarised.\nSummary files will be written to the same directory.\n', sep = '')
# Load input time-series data file; external data file, or an R object.
if(!is.object(LoadDat) & !is.character(LoadDat)) stop("LoadDat must be an object in R or a file path character string.")
if(is.object(LoadDat)) details <- data.frame(LoadDat)
if(is.character(LoadDat)){
if(!file.exists(LoadDat)) stop("Character string input for LoadDat does not resemble an existing file path.")
if(is.null(FileSep)) stop("To load a file as input, you must also specify its delimiter (FileSep).")
details <- read.delim(LoadDat, sep = FileSep)
}
#####
# Check lat and long data frame columns are named "lat" and "long" as necessary.
if(!any(names(details) == "lat") | !any(names(details) == "long")){
stop("Could not find columns for latitude and/or longitude in your data set. Must be named 'lat' and 'long'.")
}
if(!file.exists(Dir)) stop("Character string input for Dir argument does not resemble an existing file path.")
# Check valid inputs for the quality screening process.
if(!is.logical(QualityScreen)) stop("QualityScreen argument should be a logical input. See help ?MODISSummaries.")
if(QualityScreen){
if(is.null(QualityBand) | is.null(QualityThreshold)) stop("QualityBand and QualityThreshold not specified.")
}
product.bands <- try(GetBands(Product), silent = TRUE)
if(class(product.bands) != "try-error"){
# Check Band and QualityBand belong to Product.
if(!all(Bands %in% product.bands)) stop(paste("Band input does not match with", Product, "product.", sep = " "))
if(QualityScreen){
if(Product == "MCD43A4"){
if(QualityBand != "BRDF_Albedo_Band_Quality") stop("QualityBand input is not QA data for MCD43A4 product.")
} else {
if(!any(product.bands == QualityBand)) stop(paste("QualityBand is not QA data for", Product, "product.", sep = " "))
}
}
} else {
cat("MODIS server temporarily overloaded so user input checking skipped.")
}
# NoDataFill should be one integer.
if(length(NoDataFill) != 1) stop("NoDataFill input must be one integer.")
if(!is.numeric(NoDataFill)) stop("NoDataFill should be numeric class. One integer.")
if(abs(NoDataFill - round(NoDataFill)) > .Machine$double.eps^0.5) stop("NoDataFill input must be one integer.")
# ValidRange should be a numeric vector, length 2.
if(length(ValidRange) != 2) stop("ValidRange input must be a numeric vector - an upper and lower bound.")
if(!is.numeric(ValidRange)) stop("ValidRange should be numeric class.")
# ScaleFactor should be numeric, length 1.
if(length(ScaleFactor) != 1) stop("ScaleFactor input must be one numeric element.")
if(!is.numeric(ScaleFactor)) stop("ScaleFactor should be numeric class.")
# Year or posixt date format?
if(all(is.na(details$end.date))) stop("Time-series end.date variable in LoadDat is empty.")
Year <- FALSE
POSIXt <- FALSE
posix.compatible <- try(as.POSIXlt(details$end.date), silent=TRUE)
if(any(class(details$end.date) == "POSIXt") | all(class(posix.compatible) != "try-error")) POSIXt <- TRUE
if(all(is.numeric(details$end.date) & nchar(as.character(details$end.date)) == 4) &
any(class(posix.compatible) == "try-error")) Year <- TRUE
if(!Year & !POSIXt) stop("Date informDate information in LoadDat is not recognised as years or as POSIXt format.")
if(Year & POSIXt) stop("Date information in LoadDat is recognised as both year and POSIXt formats.")
if(StartDate){
if(!any(names(details) == "start.date") | all(is.na(details$start.date))){
stop("StartDate == TRUE, but no start.date field found in LoadDat.")
}
}
#####
# Date and time stamp
date <- as.POSIXlt(Sys.time())
file.date <- paste(as.Date(date),
paste(paste0("h",date$hour), paste0("m",date$min), paste0("s",round(date$sec,digits=0)), sep = "-"),
sep = "_")
# Get a list of all downloaded subset (.asc) files in the data directory.
file.list <- list.files(path = Dir, pattern = paste(Product, ".*asc$", sep = ""))
if(length(file.list) == 0) stop("Found no MODIS data files in Dir that match the request.")
size.test <- sapply(file.path(Dir, file.list), function(x) ncol(read.csv(x)[1, ]) - NUM_METADATA_COLS)
if(!all(size.test == size.test[1])) stop("The number of pixels (Size) in subsets identified are not all the same.")
# size.test checked all subsets are compatible for processing to one summary file. Can now just use size.test[1]
num.pixels <- unname(size.test[1])
# Extract IDs for ASCII files so they can be included in summary output; ncol = length(file.list), nrow = size.test.
where.id <- regexpr("___", file.list)
id <- matrix(unname(mapply(function(x, y, z) rep(substr(x, 1, y - 1), z), x = file.list, y = where.id, z = size.test,
SIMPLIFY = "array")), nrow = size.test, ncol = length(file.list))
# Time-series analysis for each time-series (ASCII file) consecutively.
band.data.site <- lapply((size.test * length(Bands)), function(x) matrix(nrow = x, ncol = 13))
for(counter in 1:length(file.list)){
cat("Processing file ", counter, " of ", length(file.list), "...\n", sep = "")
##### Load selected .asc file into a data frame, name columns and tell user what's being processed.
ds <- read.csv(file.path(Dir, file.list[counter]), header = FALSE, as.is = TRUE)
names(ds) <- c("nrow", "ncol", "xll", "yll", "pixelsize", "row.id", "product.code", "MODIS.acq.date",
"where", "MODIS.proc.date", 1:(ncol(ds) - NUM_METADATA_COLS))
# Extract year and day from the metadata and make POSIXlt dates (YYYY-MM-DD), ready for time-series analysis.
year <- as.numeric(substr(ds$MODIS.acq.date, 2, 5))
day <- as.numeric(substr(ds$MODIS.acq.date, 6, 8))
date <- strptime(paste(year, "-", day, sep = ""), "%Y-%j")
ds <- cbind(ds[,1:NUM_METADATA_COLS], date, ds[,(NUM_METADATA_COLS+1):ncol(ds)])
w.ds.dat <- which(names(ds) == "date") + 1
#####
##### Section that uses the files metadata strings [ ,1:5] for each time-series to extract necessary information.
wherelong <- regexpr("Lon", ds$where[1])
whereSamp <- regexpr("Samp", ds$where[1])
lat <- as.numeric(substr(ds$where[1], 4, wherelong - 1))
long <- as.numeric(substr(ds$where[1], wherelong + 3, whereSamp - 1))
rowIdBands <- Reduce(rbind, strsplit(ds$row.id, "[.]"))
rowIdBands <- rowIdBands[ ,ncol(rowIdBands)]
# Check that all bands listed are in the ASCII files.
if(!all(Bands %in% rowIdBands)) stop("Not all Bands are represented in data file.")
# Identify which rows in ds correspond to the quality data and put into a matrix.
if(QualityScreen){
ifelse(QualityBand %in% rowIdBands,
which.QA <- which(QualityBand == rowIdBands),
stop("Cannot find quality data in LoadDat. Download quality data with band data in MODISSubsets."))
QA.time.series <- as.matrix(ds[which.QA,w.ds.dat:ncol(ds)], nrow = length(which.QA), ncol = length(w.ds.dat:ncol(ds)))
}
#####
for(bands in 1:length(Bands)){
# Find rows in ds that correspond to Bands[bands] data and put into a matrix.
ifelse(Bands[bands] %in% rowIdBands,
which.band <- which(Bands[bands] == rowIdBands),
stop("Cannot find band data in LoadDat. Make sure ASCII files in the directory are from MODISSubsets."))
band.time.series <- as.matrix(ds[which.band,w.ds.dat:ncol(ds)],
nrow = length(which.band), ncol = length(w.ds.dat:ncol(ds)))
##### Screen pixels in band.time.series: any NoDataFill or pixels < QualityThreshold, will be replaced with NA.
ifelse(QualityScreen,
band.time.series <- QualityCheck(Data = band.time.series, QualityScores = QA.time.series, Band = Bands[bands],
NoDataFill = NoDataFill, QualityBand = QualityBand, Product = Product,
QualityThreshold = QualityThreshold),
band.time.series <- matrix(ifelse(band.time.series != NoDataFill, band.time.series, NA),
nrow = length(which.band)))
# Final check, that band values all fall within the ValidRange (as defined for given MODIS product band).
if(any(!(band.time.series >= ValidRange[1] && band.time.series <= ValidRange[2]), na.rm = TRUE)){
stop("Some values fall outside the valid range, after no fill values should have been removed.")
}
####
# Initialise objects for various summaries.
mean.band <- rep(NA, num.pixels)
sd.band <- rep(NA, num.pixels)
band.yield <- rep(NA, num.pixels)
nofill <- rep(NA, num.pixels)
poorquality <- rep(NA, num.pixels)
band.min <- rep(NA, num.pixels)
band.max <- rep(NA, num.pixels)
if(DiagnosticPlot & num.pixels != 1){
cat("Can not provide diagnostic plots for subsets larger than a single pixel")
DiagnosticPlot <- FALSE
}
# Run time-series analysis for the ith
for(i in 1:ncol(band.time.series)){
# Minimum and maximum band values observed.
minobsband <- min(as.numeric(band.time.series[ ,i]) * ScaleFactor, na.rm = TRUE)
maxobsband <- max(as.numeric(band.time.series[ ,i]) * ScaleFactor, na.rm = TRUE)
# Assess quality of data at this time-step, to decide how to proceed with analysis.
data.quality <- ifelse(QualityScreen, sum(!is.na(band.time.series[ ,i])), 2)
if(data.quality >= 2){
# Linearly interpolate between screened data points, for each pixel, over time (default is daily).
if(Interpolate){
if(is.null(InterpolateN)){
#N <- max(ds$date[!is.na(band.time.series[ ,i])]) - min(ds$date[!is.na(band.time.series[ ,i])])
InterpolateN <- round(0.9 * as.numeric(length(band.time.series)))
}
sout <- approx(x = 1:nrow(band.time.series), y = as.numeric(band.time.series[ ,i]) * ScaleFactor,
method = "linear", n = InterpolateN)
}
# Carry out all the relevant summary analyses, set by options in the function call.
# (((365*length(years))-16)*365) = average annual yield (i.e. work out daily yield * 365).
if(Yield) band.yield[i] <- (sum(sout$y) - minobsband * length(sout$x)) / length(sout$x)
if(Mean){
ifelse(Interpolate,
mean.band[i] <- mean(sout$y),
mean.band[i] <- mean(as.numeric(band.time.series[ ,i]) * ScaleFactor, na.rm = TRUE))
}
if(SD){
ifelse(Interpolate,
sd.band[i] <- sd(sout$y),
sd.band[i] <- sd(as.numeric(band.time.series[ ,i]) * ScaleFactor, na.rm = TRUE))
}
}
if(data.quality == 1){
warning("Only single data point that passed the quality screen: cannot summarise", immediate. = TRUE)
if(Mean) mean.band[i] <- mean(as.numeric(band.time.series[ ,i]) * ScaleFactor, na.rm = TRUE)
}
if(data.quality == 0) warning("No reliable data for this pixel", immediate. = TRUE)
# Complete final optional summaries, irrespective of data quality.
if(Min) band.min[i] <- minobsband
if(Max) band.max[i] <- maxobsband
nofill[i] <- paste(round((sum(ds[ ,i + (w.ds.dat - 1)] == NoDataFill) / length(band.time.series[ ,i])) * 100, 2),
"% (", sum(ds[ ,i + (w.ds.dat - 1)] == NoDataFill), "/", length(band.time.series[ ,i]), ")",
sep = "")
ifelse(QualityScreen,
poorquality[i] <-
paste(round((sum(QA.time.series[ ,i] > QualityThreshold) / length(QA.time.series[ ,i])) * 100, 2),
"% (", sum(QA.time.series[ ,i] > QualityThreshold), "/", length(QA.time.series[ ,i]), ")", sep = ""),
poorquality[i] <- NA)
} # End of loop for time-series summary analysis for each pixel.
# Compile time-series information and relevant summaries data into a final output listed by-sites (.asc files).
band.data.site[[counter]][(((bands - 1) * num.pixels) + 1):(num.pixels * bands), ] <-
matrix(data = c(ID = id[ ,counter],
lat = rep(lat, num.pixels),
long = rep(long, num.pixels),
start.date = rep(as.character(min(ds$date)), num.pixels),
end.date = rep(as.character(max(ds$date)), num.pixels),
data.band = rep(Bands[bands], num.pixels),
min.band = band.min,
max.band = band.max,
mean.band = mean.band,
sd.band = sd.band,
band.yield = band.yield,
no.fill.data = nofill,
poor.quality.data = poorquality),
nrow = num.pixels, ncol = 13)
if(DiagnosticPlot){
directory <- file.path(Dir, "DiagnosticPlots")
if(file.exists(directory) == FALSE) dir.create(directory)
filename <- file.path(directory, paste(id[counter], Product,
Bands[bands], file.date, ".pdf", sep = "_"))
if(data.quality == 1 | data.quality == 0){
cat("Too few data points for diagnostic plot for this site\n")
} else {
pdf(filename)
SiteId <- max(paste(lat, long, year))
plot(band.time.series[,i] * ScaleFactor, pch = 19, main = id[counter], xlab = "Timestep", ylab = Bands[bands], ylim = c(-0.1, 1))
if(Interpolate) lines(sout, col = "red")
if(Mean) abline(a = mean.band[i], b=0, lty = 2, col = "red")
if(Min) abline(a = band.min[i], b=0, lty = 2)
if(Max) abline(a = band.max[i], b=0, lty = 2)
dev.off()
}
}
} # End of loop for Bands.
} # End of loop for each ASCII file.
band.data.site <- as.data.frame(do.call("rbind", band.data.site), stringsAsFactors = FALSE)
names(band.data.site) <- c("ID", "lat", "long", "start.date", "end.date", "data.band", "min.band", "max.band",
"mean.band", "sd.band", "band.yield", "no.fill.data", "poor.quality.data")
### Create SubsetID variable as created in MODISSubsets for correct matching.
if(StartDate){
lat.long <- details[!is.na(details$lat) | !is.na(details$long) | !is.na(details$end.date) | !is.na(details$start.date), ]
lat.long <- lat.long[!duplicated(data.frame(lat.long$lat, lat.long$long, lat.long$end.date, lat.long$start.date)), ]
} else if(!StartDate){
lat.long <- details[!is.na(details$lat) | !is.na(details$long) | !is.na(details$end.date), ]
lat.long <- lat.long[!duplicated(data.frame(lat.long$lat, lat.long$long, lat.long$end.date)), ]
}
ID.check <- ifelse(any(names(details) == "ID"), TRUE, FALSE)
n.unique <- FALSE
if(ID.check){
# Check if ID variable can identify unique time series
n.unique <- length(unique(lat.long$ID)) == nrow(lat.long)
if(n.unique) names(lat.long)[names(lat.long) == "ID"] <- "SubsetID"
} else if(!ID.check | !n.unique){
# Create end.date and start.date
Year <- FALSE
POSIXt <- FALSE
posix.compatible <- try(as.POSIXlt(lat.long$end.date), silent = TRUE)
if(any(class(lat.long$end.date) == "POSIXt") | all(class(posix.compatible) != "try-error")) POSIXt <- TRUE
if(all(is.numeric(lat.long$end.date) & nchar(as.character(lat.long$end.date)) == 4) &
any(class(posix.compatible) == "try-error")) Year <- TRUE
if(!Year & !POSIXt) stop("Date information in LoadDat is not recognised as years or as POSIXt format.")
if(Year & POSIXt) stop("Date information in LoadDat is recognised as both year and POSIXt formats.")
if(Year){
end.date <- strptime(paste(lat.long$end.date, "-12-31", sep = ""), "%Y-%m-%d")
if(StartDate){
start.year.fail <- any(!is.numeric(lat.long$start.date) | nchar(lat.long$start.date) != 4)
if(start.year.fail) stop("end.date identified as year dates, but start.date does not match.")
start.date <- strptime(paste(lat.long$start.date, "-01-01", sep = ""), "%Y-%m-%d")
}
} else if(POSIXt){
end.date <- strptime(lat.long$end.date, "%Y-%m-%d")
if(StartDate){
start.posix.fail <- any(class(try(as.POSIXlt(lat.long$end.date), silent = TRUE)) == "try-error")
if(start.posix.fail) stop("end.date identified as POSIXt dates, but start.date does not match.")
start.date <- strptime(lat.long$start.date, "%Y-%m-%d")
}
}
if(!StartDate){
date.ids <- mapply(function(x,y,z) unique(band.data.site$ID[band.data.site$lat == x &
band.data.site$long == y &
grepl(z, band.data.site$ID)]),
x = lat.long$lat, y = lat.long$long, z = as.character(end.date))
if(!all(sapply(date.ids, length) == 1)){
stop("Couldn't match summarised data back with original dataset.\n",
"Make sure LoadDat is exact same dataset used to download MODIS data being summarised.")
}
start.date <- substr(date.ids, regexpr("Start", date.ids)+5, regexpr("End", date.ids)-1)
}
lat.long <- data.frame(SubsetID = paste("Lat", sprintf("%.5f", lat.long$lat), "Lon", sprintf("%.5f", lat.long$long),
"Start", start.date, "End", end.date, sep = ""),
lat.long, stringsAsFactors = FALSE)
}
###
res <- data.frame(cbind(details, matrix(NA, nrow = nrow(details), ncol = 1+(num.pixels * length(Bands)))))
names(res) <-
c(names(details), "SubsetID",
paste(rep(Bands, each = num.pixels), "_pixel", rep(1:num.pixels, times = length(Bands)), sep = ""))
for(i in 1:length(lat.long$SubsetID)){
res$SubsetID[sprintf("%.5f", res$lat) %in% sprintf("%.5f", as.numeric(lat.long$lat[i])) &
sprintf("%.5f", res$long) %in% sprintf("%.5f", as.numeric(lat.long$long[i])) &
res$end.date %in% lat.long$end.date[i]] <- lat.long$SubsetID[i]
res[res$SubsetID %in% lat.long$SubsetID[i],(which(names(res) == "SubsetID")+1):ncol(res)] <-
matrix(band.data.site$mean.band[band.data.site$ID == lat.long$SubsetID[i]],
nrow = nrow(as.matrix(res[res$SubsetID %in% lat.long$SubsetID[i],(which(names(res) == "SubsetID")+1):ncol(res)])),
ncol = ncol(as.matrix(res[res$SubsetID %in% lat.long$SubsetID[i],(which(names(res) == "SubsetID")+1):ncol(res)])),
byrow = TRUE)
}
#####
# Write output summary file by appending summary data from all files, producing one file of summary stats output.
write.table(band.data.site, file = file.path(Dir, paste("MODIS_Summary_", Product, "_", file.date, ".csv", sep = "")),
sep = ",", row.names = FALSE)
# Write the final appended dataset to a csv file, ready for use, in Dir.
write.table(res, file = file.path(Dir, paste("MODIS_Data_", Product, "_", file.date, ".csv", sep = "")),
sep = ",", col.names = TRUE, row.names = FALSE)
#####
cat("Done! Check the 'MODIS Summary' and 'MODIS Data' output files.\n")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.