# lumpR/prof_class.R
# Copyright (C) 2014-2018 Tobias Pilz, Till Francke
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#' Classification of mean catenas
#'
#' Classifies mean catenas derived from \code{\link[lumpR]{area2catena}} into \emph{Landscape
#' Units} and \emph{Terrain Components}.
#'
#' @param catena_file Name of file containing mean catena information derived from
#' \code{\link[lumpR]{area2catena}}.
#' @param catena_head_file Name of file containing meta-information for classification
#' derived from \code{\link[lumpR]{area2catena}} and adjusted manually (see \code{Notes}).
#' @param svc_column Field name in \code{catena_head_file} that holds the information
#' of SVCs for generating \code{tccontainssvcoutfile}. Default: 'svc'.
#' @param dir_out Character string specifying output directory (will be created;
#' nothing will be overwritten).
#' @param luoutfile Output: Name of file containing the average properties of
#' \emph{Landscape Units}.
#' @param tcoutfile Output: Name of file containing the average properties of
#' \emph{Terrain Components}.
#' @param lucontainstcoutfile Output: Name of file containing information wich
#' LU contains which TCs.
#' @param tccontainssvcoutfile Output: Name of file containing information wich
#' TC contains which SVCs.
#' @param terraincomponentsoutfile Output: Name of file containing general
#' properties of TCs.
#' @param recl_lu Output: Name of GRASS reclassification file: EHA -> LU.
#' @param saved_clusters Output: Name of R file that can be used to store LU
#' characteristics for later re-use; set to NULL to omit output (default).
#' @param classify_type Type of classification:\cr
#' ' ': (default) unsupervised classification, no \code{saved_clusters} will be produced\cr
#' 'save': do unsupervised classification and save cluster centers to \code{saved_clusters}
#' for future supervised classification\cr
#' 'load': load cluster centers from existing file and classify according
#' to these clusters (e.g. supervised classification). CURRENTLY NOT SUPPORTED!
#' @param seed Integer specifying seed for random processes in cluster analysis.
#' @param resolution Integer specifying resolution of profiles/spacing of samples.
#' Typically the resolution of your GRASS location used for \code{\link[lumpR]{area2catena}}.
#' @param max_com_length Integer defining the maximum common length of profiles,
#' i.e. the maximum number of support points representing each catena during the
#' classification procedure. Too large values consume more memory and computational
#' effort.
#' @param com_length Integer giving common length of profiles, i.e. the number of
#' support points representing each catena during the classification procedure.
#' Too large values consume more memory and computational effort. Overwrites
#' max_com_length.
#' @param make_plots logical; visualisation of classification results written into
#' sub-directory \emph{plots_prof_class}. WARNING: Consumes a lot of processing
#' time and memory. Default: \code{FALSE}.
#' @param eha_subset NULL or integer vector with subset of EHA ids that shall
#' be processed (for debugging and testing).
#' @param eha_blacklist NULL or integer vector with subset of EHA ids that will
#' be excluded (use this for manual exclusion of strange profiles).
#' @param overwrite \code{logical}. Shall output of previous calls of this function be
#' deleted? If \code{FALSE} the function returns an error if output already exists.
#' Default: \code{FALSE}.
#' @param silent \code{logical}. Shall the function be silent (also suppressing warnings)?
#' Default: \code{FALSE}.
#' @param plot_silhouette \code{logical}. Shall a silhouette plot (illustrating the clustering
#' process) be generated? Consumes much memory and processing time and should be disabled,
#' if a memory error is thrown. Will be \code{FALSE} if \code{make_plots = FALSE}.
#' Default: \code{TRUE}.
#'
#' @return Function returns nothing. Output files are written into output directory
#' as specified in arguments.
#'
#' @note Function uses output of \code{\link[lumpR]{area2catena}}. However, no GRASS
#' session needs to be started in this case.
#'
#' After applying \code{recl_lu}, the resulting landscape units raster map in your GRASS
#' location might show gaps depending on the number of generated landscape units
#' as each landscape unit refers to the representative EHA. The gaps can be filled
#' with GRASS function \code{r.grow}.
#'
#' In case of \bold{long computation times or memory issues}, try \code{make_plots = FALSE}
#' and specify an RData file as \code{catena_file} (already in \code{\link[lumpR]{area2catena}}).
#'
#' @details This function first resamples the catenas derived from \code{\link[lumpR]{area2catena}}
#' to a common length (\code{com_length} or the median number of support points
#' of all catenas but not more than \code{max_com_length}). Second, k-means clustering
#' is employed to group the catenas into representative \emph{Landscape Units}
#' according to parameters given via \code{catena_head_file} taking into account
#' hillslope length, shape, and supplemental properties. Third, each group is further
#' partitioned into a given number of \emph{Terrain Components} in a way that the
#' variance within each TC is minimized considering slope gradient and supplemental
#' information.
#'
#' @references Source code based on \code{SHELL} and \code{MATLAB} scripts of Till Francke.
#'
#' lumpR package introduction with literature study and sensitivity analysis:\cr
#' Pilz, T.; Francke, T.; Bronstert, A. (2017): lumpR 2.0.0: an R package facilitating
#' landscape discretisation for hillslope-based hydrological models.
#' \emph{Geosci. Model Dev.}, 10, 3001-3023, doi: 10.5194/gmd-10-3001-2017
#'
#' Theory of LUMP:\cr
#' Francke, T.; Guentner, A.; Mamede, G.; Mueller, E. N. and Bronstert, A (2008):
#' Automated catena-based discretization of landscapes for the derivation of
#' hydrological modelling units. \emph{International Journal of Geographical
#' Information Science, Informa UK Limited}, 22(2), 111-132, DOI: 10.1080/13658810701300873
#'
#' @author Tobias Pilz \email{tpilz@@uni-potsdam.de}, Till Francke \email{francke@@uni-potsdam.de}
prof_class <- function(
### INPUT ###
catena_file=NULL,
catena_head_file=NULL,
svc_column='svc',
### OUTPUT ###
dir_out="./",
luoutfile="lu.dat",
tcoutfile="tc.dat",
lucontainstcoutfile="lucontainstc.dat",
tccontainssvcoutfile="r_tc_contains_svc.dat",
terraincomponentsoutfile="terraincomponents.dat",
recl_lu="reclass_lu.txt",
saved_clusters=NULL,
### PARAMETERS ###
seed=1312,
resolution=NULL,
classify_type=' ',
max_com_length=NULL,
com_length=NULL,
make_plots=F,
eha_subset=NULL,
eha_blacklist=NULL,
overwrite=F,
silent=F,
plot_silhouette=T
) {
### PREPROCESSING ###----------------------------------------------------------
if(!silent) message("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
if(!silent) message("% START prof_class()")
if(!silent) message("%")
if(!silent) message("% Initialise function...")
# checks #---------------------------------------------------------------------
# check output directory
if (!overwrite & ( file.exists(paste(dir_out,luoutfile,sep="/")) |
file.exists(paste(dir_out,tcoutfile,sep="/")) |
file.exists(paste(dir_out,lucontainstcoutfile,sep="/")) |
file.exists(paste(dir_out,tccontainssvcoutfile,sep="/")) |
file.exists(paste(dir_out,terraincomponentsoutfile,sep="/")) |
file.exists(paste(dir_out,recl_lu,sep="/")) ) )
stop(paste0("In output directory '", dir_out, "' some or all output file(s) already exist!"))
if (make_plots) {
if(length(dir(paste(dir_out, "plots_prof_class", sep="/"))) != 0)
{
if(overwrite)
file.remove(dir(paste(dir_out, "plots_prof_class", sep="/"), full.names = T))
else
stop(paste0("Output directory for plots '", dir_out, "/plots_prof_class/' is not empty!"))
}
dir.create(paste(dir_out, "plots_prof_class", sep="/"), recursive=T, showWarnings = F)
}
# argument checks
if(is.null(catena_file) || !file.exists(catena_file))
stop("'catena_file' has not been specified or does not exist!")
if(is.null(catena_head_file) || !file.exists(catena_head_file))
stop("'catena_head_file' has not been specified or does not exist!")
if(!is.numeric(resolution))
stop("Resolution of the raster used to produce 'catena_file' and 'catena_head_out' needs to be given (as numeric)!")
if(is.null(max_com_length) & is.null(com_length))
stop("Either parameter 'max_com_length' or 'com_length' has to be given!")
if(!grepl("^[[:blank:]]{1}|save",classify_type))
stop("'classify_type' must be ' ' or 'save' ('load' is currently not supported).")
# supress warnings in silent mode
if(silent){
tmp_file2 <- file(tempfile(), open="wt")
sink(tmp_file2, type="message")
oldw <- getOption("warn")
options(warn = -1)
}
if(!silent) message("% OK")
### CALCULATIONS ###-----------------------------------------------------------
tryCatch(
{
if(!silent) message("%")
if(!silent) message("% Load and prepare data...")
# import and prepare data #----------------------------------------------------
# output dir
dir.create(dir_out, recursive=T, showWarnings=F)
# horizontal resolution of profiles/spacing of samples
if (!(is.numeric(resolution) && is.finite(resolution) && (resolution>0)))
stop("Argument 'resolution must be a positive number.")
dx <- resolution
# separator in outfiles
tab <- "\t"
# LOAD INPUT #
# load stats header
headerdat <- as.matrix(read.table(catena_head_file, header=T))
# specification of number of columns used by each attribute
datacolumns <- headerdat[1,]
# relative weight of each attribute (supplemental data) to be used in classification
attr_weights_class <- headerdat[2,]
# relative weight of each attribute (supplemental data) to be used in partition (terrain component decomposition)
attr_weights_partition <- headerdat[3,]
if (attr_weights_partition[1] < 1) {
message(paste('% -> WARNING: number of TCs will be set to 2 instead of ', attr_weights_partition[1], ' as specified in catena_head_file', sep=""))
attr_weights_partition[1] <- 2
}
# store the names of the attributes
attr_names <- colnames(headerdat)
# determine type of classification from catena_head_file
if (attr_weights_class[1] < 0) {
cf_mode <- 'successive' # classification performed to specified number of classes for each attribute (option 2)
attr_weights_class <- abs(attr_weights_class)
# only for nice output in loop
attr_names[1] <- 'shape'
attr_names[2] <- 'xy-extent'
} else {
cf_mode <- 'singlerun' # classification performed in single run for all classes using the specified weighting factors (option 1)
warning("cf_mode='singlerun' is experimental. Please consider 'successive' by adding a '-' before the first number of line 8 in rstats_head.txt")
}
# number of classes to divide the dataset into
nclasses <- attr_weights_class[1]
# number of TCs to be created in each LU
ntc <- attr_weights_partition[1]
# if supervised classification using saved clusters is to be used
# com_length <- -1
# load standard catena data
if(grepl(".RData$", catena_file)) {
load(catena_file)
stats <- logdata[,1,drop=F]
} else {
stats <- scan(catena_file, nlines = 1, what=numeric(), sep = "\t", quiet = TRUE) #read first line only
stats <- read.table(file = catena_file, colClasses = c("numeric", rep("NULL", length(stats)-1)), sep = "\t") #read first column only
}
profpoints <- table(stats[,1]) #count number of points of each catena
p_id_unique = unique(stats[,1]) #get unique IDs
rm(stats)
if (!is.null(eha_subset))
{
if(!silent) message("% -> WARNING: Using only a subset as specified in the argument 'eha_subset'.")
p_id_unique = intersect(p_id_unique, eha_subset)
if (length(p_id_unique)==0)
stop("Specified 'eha_subset' not found.")
}
if (!is.null(eha_blacklist))
{
if(!silent) message("% -> WARNING: Excluding a subset as specified in the argument 'eha_blacklist'.")
p_id_unique = setdiff(p_id_unique, eha_blacklist)
if (length(p_id_unique)==0)
stop("No profiles remaining after exclusion. Check 'eha_blacklist'")
}
n_profs = length(p_id_unique)
# use the median of sampling points as the desired common length of profiles
if (classify_type != 'load') {
if (is.null(com_length)) #set com_length, if not specified from outside
{
# common number of points for re-sampled hillslopes
com_length <- max(ntc, round(median(profpoints))) #use at least as many sampling points as requested TCs
com_length <- min(com_length, max_com_length)
}
} # otherwise, the resolution from the saved clusters is used
n_suppl_attributes = max(0, length(datacolumns) - 3) #number of supplemental attributes
n_supp_data_columns <- sum(datacolumns[4:length(datacolumns)]) #number of respective columns
# allocate new matrix for storing resampled profiles (hillslopes)
# for each profile (rows) com_length elevation points, the profile length, the
# profile height, and supplemental data is stored in one long vector
profs_resampled_stored <- matrix(NA, nrow=n_profs, ncol=com_length+2+com_length*n_supp_data_columns)
# PLOT original profile
if (make_plots) {
pdf(paste(dir_out, "plots_prof_class/plots_prof_class.pdf", sep="/"))
plot(1,1,type="n", xlim=c(0,max(profpoints)*resolution), ylim=c(0,500),
main="Original catenas", xlab="horizontal length [m]", ylab="elevation [m]")
}
#read and resample profiles (done at the same time to avoid duplicates in memory)
# TODO: This mess needs to be improved!
if(!grepl(".RData$", catena_file)) testcon <- file(catena_file,open="r")
if(!silent) #for printing progress indicator
pb <- txtProgressBar(min = 0, max = length(p_id_unique), style = 3)
i=0 #counter for valid profiles read
total_read = 0 #counter for total profiles read
while (i < length(p_id_unique))
{
i=i+1
if (!silent) #next progress message
setTxtProgressBar(pb, i)
cur_p_id = p_id_unique[i] #id of profile to be loaded
p_pos = which(names(profpoints)==cur_p_id) #position of current profile in list
skiplines = sum(profpoints[(total_read+1) : p_pos]) - profpoints[p_pos] #compute number of lines to skip to reach the next selected profile
if(grepl(".RData$", catena_file)) {
tt <- as.matrix(logdata[which(logdata[,1] == cur_p_id),])
} else {
tt = readLines(con = testcon, n = skiplines)
tt = scan(file=testcon, what=numeric(), nlines = profpoints[p_pos], quiet = TRUE) #read single profile
tt = matrix(tt, nrow = c(profpoints[p_pos]), byrow = TRUE) #reshape matrix
}
total_read = p_pos #we are now just at the desired profile
if (any(tt[,1]!= cur_p_id)) stop(paste0("Format error in ",catena_file))
if (make_plots)
lines(tt[,2]*resolution, tt[,3])
if (profpoints[p_pos] < 2) { #catena too short, ignore
if(!silent) message(paste('% -> WARNING: profile ', paste(cur_p_id, collapse=", "), ' contains only one point. Skipped.', sep=""))
profs_resampled_stored[i,] = NA
}
p_resampled <- apply(tt[,-(1:2)], MARGIN = 2, FUN=function(y) approx(x = tt[,2]-1, y, xout = 0:(com_length-1)/(com_length-1)*(profpoints[p_pos]-1))$y)
# set foot of profile to zero elevation
p_resampled[,1] = p_resampled[,1] - p_resampled[1,1]
# amplitude of profile will be scaled to 1
d <- max(p_resampled[,1])
if(d==0) {
if(!silent) message(paste('% -> WARNING: Profile ', cur_p_id, ' has no elevation gain (runs flat)', sep=""))
p_resampled[,1] <- 0
} else {
# normalize profile in elevation (ie. normally, top end at elevation = 1)
p_resampled[,1] <- p_resampled[,1] / d
}
prof_length = (profpoints[p_pos]-1) * resolution #compute original length of catena
prof_height = max(tt[,3]) - tt[1,3] #compute original elevation gain of catena
# append the dimension components, unweighted
profs_resampled_stored[i,1:(com_length+2)] <- c(p_resampled[,1], prof_length, prof_height)
# treat supp_data if present (resample, weigh and add to profile vector to be included in cluster analysis)
if (n_suppl_attributes>0)
{
#check supplemental data for this profile
p_supp <- tt[-(1:3),]
all_na <- apply(p_supp, 2, function(x) all(is.na(x)))
if (any(all_na))
{
if (make_plots)
dev.off() #close PDF output
na_attributes = names(datacolumns[-(1:3)])[unique(sapply (X = which(all_na), function(x) min(which(cumsum(datacolumns[-(1:3)]) >= x))))] #names of attributes with all NAs
stop(paste0("Error: EHA ", cur_p_id," has only NAs for attribute(s) ", paste0(na_attributes, collapse=", "),". Most likely a result of insufficient map coverage. Fix coverage, remove this attribute or replace NAs manually."))
}
profs_resampled_stored[i,(com_length+3):(com_length+2+com_length*n_supp_data_columns)] <- as.vector(p_resampled[,-1])
}
}
if(!silent) # close progress bar
close(pb)
if(grepl(".RData$", catena_file)) {
rm(logdata)
} else {
close(testcon)
}
# remove not needed objects to save memory
rm(list = c("p_supp", "p_resampled", "tt"))
gc(verbose = F);gc(verbose = F)
#save(file="debug.RData", list = ls(all.names = TRUE)) #for debugging only
# PREPARE attribute loop and key-generation #
# START OF CLASS KEY GENERATION #
attr_weights_class_original <- attr_weights_class
iw <- 1
# successive weighting vs. single run: set number of times the classification loop has to be run
if (cf_mode == 'successive') {
iw_max <- length(attr_weights_class) # successive weighting for each single attribute
} else {
iw_max <- 1 # cf_mode ='singlerun'
}
# PLOT modified (resampled) profiles
if (make_plots) {
plot(1,1,type="n", xlim=c(0,com_length-1), ylim=c(0,1),
main="catenas, resampled, reduced & normalized", xlab="catena point number", ylab="relative elevation")
for (i in 1:nrow(profs_resampled_stored)) {
lines(0:(com_length-1), profs_resampled_stored[i,1:com_length])
}
}
if(!silent) message(paste0("% OK, ",length(p_id_unique)," profiles loaded."))
# classification of ehas (clustering) #----------------------------------------
if(!silent) message("%")
if(!silent) message("% Clustering of EHAs...")
cidx_save <- list(NULL) # initialise list to store classification results; i.e. a vector assinging each EHA to a cluster class for each attribute
hc <- 0
while (iw <= iw_max) {
# ensure reproducible random numbers for debugging / repeatitions
set.seed(seed)
# SUCCESSIVE weighting for each single attribute
if (cf_mode == 'successive') {
emptymask <- vector("numeric", length=length(attr_weights_class_original))
# first run > shape is considered
if (iw==1) {
maskw <- emptymask
maskw[1] <- 1
} else if (iw==2) { # second run > x/y dimension is considered
maskw <- emptymask
maskw[2] <- 1
maskw[3] <- attr_weights_class_original[3]
} else if (iw>3) { # next runs > weights of all other parameters are set to zero, consideration of single attribute
maskw <- emptymask
maskw[iw] <- 1
}
# modify original weights:
attr_weights_class <- maskw
# set nclass to specification in header file
nclasses <- attr_weights_class_original[iw]
# in case of erroneous input zero or only 1 class, don't do classification, just append the result of classifying into 1 class
if (nclasses==0 || nclasses==1) {
cidx_save[[iw]] <- rep(1, n_profs)
if(!silent) message(paste("% -> skipped '", attr_names[iw], "' (", iw-(iw>2), "/", length(attr_names)-1, ") because number of classes=1", sep=""))
if (iw==2) {
iw <- 4
} else {
iw <- iw+1 # alter index for attribute-loop
}
next
}
if(!silent) message(paste("% -> successive clustering loop, treating attribute '", attr_names[iw], "' (", iw-(iw>2), "/", length(attr_names)-1, ")", sep=""))
hc <- hc+1
} # end cases of cf_mode 'successive'/'singlerun'
# auxiliary vector for computing required array size for pre-allocation
t_help <- rep(1,length(datacolumns))*com_length
t_help[2:3] <- 1 # dimension attributes need only one value
#ii: isn't this a copy of profs_resampled_stored, which is then weighted? Do we need this duplication, or
#couldn't weight and "unweigh" (if necessary later) we the same instance to save memory?
profs_resampled <- matrix(0, nrow=n_profs, ncol=sum(t_help*datacolumns*(attr_weights_class!=0)))
# do WEIGHTING for all profiles according to current weighting scheme
for (i in 1:n_profs) {
dest_column <- 1 # destination column where an attribute is placed
# weigh shape and the dimension components, weighted with specified weights attr_weights_class(2), attr_weights_class(3) multiplied by com_length to make the weighting independent of any com_length that was computed; TODO: don't understand this
# put into data matrix only if the weighting factors are not zero
if (attr_weights_class[1]) {
profs_resampled[i,dest_column:(dest_column+com_length-1)] <- profs_resampled_stored[i,1:com_length]*attr_weights_class[1]
dest_column <- dest_column+com_length
}
if (attr_weights_class[2]) {
prof_length = profs_resampled_stored[i,com_length+1] # real length of profile
profs_resampled[i,dest_column] <- prof_length*com_length*attr_weights_class[2]
dest_column <- dest_column+1
}
if (attr_weights_class[3]) {
prof_height = profs_resampled_stored[i,com_length+2] # real height of profile
profs_resampled[i,dest_column] <- prof_height*com_length*attr_weights_class[3]
dest_column <- dest_column+1
}
#ii: isn't this necessary only for cf_mode != 'successive'? Even in successive, wouldn't it be enough to do it once?
#treat supp_data if present (resample, weigh and add to profile vector to be included in cluster analysis)
if (n_suppl_attributes) {
attr_start_column <- 1+com_length+2 #initial value for first loop
# append all the supplemental components, weighted with specified weights
for (j in 4:length(datacolumns)) {
# skip attributes with no data columns
if(datacolumns[j]==0) next
new_columns <- datacolumns[j]*com_length-1
# skip attributes weighted with 0
if(attr_weights_class[j]==0) {
attr_start_column <- attr_start_column+new_columns+1
next
}
attr_end_column <- attr_start_column+new_columns
# Weigh the current supplemental attribute, divide by
# number of fields (attr_end_column-attr_start_column+1) to
# prevent multi-field attributes to get more relative weight
profs_resampled[i,dest_column:(dest_column+new_columns)] <- profs_resampled_stored[i,attr_start_column:attr_end_column] * attr_weights_class[j] / (attr_end_column-attr_start_column+1)
dest_column <- dest_column+new_columns
# the next attribute starts one column further in prof_resampled_stored
attr_start_column <- attr_end_column+1
} # end weigh and append suppl data
} else {
profs_resampled <- profs_resampled[i,1:com_length]
} # end treat suppl data
} # end weighting
# CLUSTER-ANALYSIS
cidx=array(NA,nrow(profs_resampled)) #cluster membership
if (classify_type=='load') {
# classification based on loaded classes, TODO
#[cidx,sumd,dists]=cluster_supervised(profs_resampled_stored,nclasses,cluster_centers_weighted);
stop("not yet implemented")
} else {
# unsupervised classification
dups = duplicated(profs_resampled)
if (nrow(profs_resampled) - sum(dups) <= nclasses) #not enough distinct profiles for this attribute
{
kmeans_out=NULL #disables later plots
for(jj in 1:nrow(profs_resampled))
cidx[jj]=which(apply(profs_resampled[!dups,, drop=FALSE], MARGIN=1, FUN=identical, profs_resampled[jj,]))
cmeans2 <- profs_resampled[dups,, drop=FALSE] # matrix of cluster centers
sumd <- 0 # within-cluster sum of squares, one per cluster
} else
{ #regular case
kmeans_out <- kmeans(profs_resampled, centers=nclasses, nstart=10)
cidx <- kmeans_out$cluster # cluster number for each point
cmeans2 <- kmeans_out$centers # matrix of cluster centers
sumd <- kmeans_out$withinss # within-cluster sum of squares, one per cluster
}
}
# store classification result of this treated attribute
cidx_save[[iw]] <- cidx
if(!silent) message(paste('% -> profile clustering: fitting index_c = ', round(sqrt(sum(sumd^2)),2), sep=""))
if (length(unique(cidx)) < nclasses) {
if(!silent) message(paste("% -> WARNING: ", nclasses-length(unique(cidx)), ' empty clusters produced.', sep=""))
} else if (make_plots) {
# silhouette plot, doesn't work with empty clusters
if (!is.null(kmeans_out) & plot_silhouette)
{
dists <- daisy(profs_resampled) # compute pairwise distances, TODO: see warnings
plot(silhouette(kmeans_out$cluster, dists^2), main=attr_names[iw]) # plot silhouette
} else
dists <- matrix(-9999, nrow=n_profs, ncol=nclasses) #dummy, no distance computed
}
# PLOT classified catenas
# originals
if (make_plots) {
plot(1,1,type="n", xlim=c(0,max(profs_resampled_stored[,com_length+1])), ylim=c(0,max(profs_resampled_stored[,com_length+2])),
main=paste("Original catenas\nclassified according ", attr_names[iw], sep=""), xlab="horizontal length [m]", ylab="elevation [m]")
for (i in 1:n_profs) {
lines( (0:(com_length-1) / (com_length-1)) * profs_resampled_stored[i,com_length+1],
profs_resampled_stored[i,1:com_length] * profs_resampled_stored[i,com_length+2], col=cidx[i])
}
}
# modified
if (make_plots) {
plot(1,1,type="n", xlim=c(0,com_length-1), ylim=c(0,1), main=paste("catenas, resampled, reduced & normalized\nclassified according ", attr_names[iw], sep=""),
xlab="catena point number", ylab="relative elevation")
for (i in 1:nrow(profs_resampled_stored)) {
lines(0:(com_length-1), profs_resampled_stored[i,1:com_length], col=cidx[i])
}
}
# alter index for attribute-loop
if (iw==2) {
iw <- 4 # because x and y dimension are treated together
} else {
iw <- iw+1
}
} # end classification loop through all attributes
# complete key generation (successive mode only)
# successive weighting mode: all prior classifications will be merged into one by generating unique key
if (cf_mode == 'successive') {
cidx_save[[iw_max+1]] <- 0 #inititalize overall (composite) classification result
for (iz in 1:iw_max) {
# eliminate empty iw=3 (horiz. & vertical extent treated together) by jumping to next entry
if (iz==3) next
# generate key from previous classifications
cidx_save[[iw_max+1]] <- cidx_save[[iw_max+1]]*10 + cidx_save[[iz]] #attention: this will be faulty when more than 10 classes have been chosen for any attribute, fix this
}
nclasses <- length(unique(cidx_save[[iw_max+1]]))
attr_weights_class_original[iw+1] <- nclasses
# "pretend" this is the actual classification
cidx <- cidx_save[[iw_max+1]]
# quick and dirty computation of distance matrix to allow the rest of the script to be run without problems
dists <- matrix(1, nrow=n_profs, ncol=nclasses)
unique_classes <- unique(cidx)
for (ii in 1:nclasses) {
# find all profiles belonging to current class
class_i <- which(cidx==unique_classes[ii])
# set their distance to the cluster centre to 0
dists[class_i,ii] <- 0
}
} # end key generation
if(!silent) message("% OK.")
# calculation of mean catena for each cluster #--------------------------------
if(!silent) message("%")
if(!silent) message("% Calculate mean catena for each cluster...")
unique_classes <- unique(cidx)
if (length(unique_classes)!=nclasses) {
if(!silent) message(paste('% -> WARNING: Number of generated classes (', length(unique_classes), ') is not not as expected (', nclasses, '). Too few EHAs, too many classes requested, too few differences in EHAs? Please check what happended.'))
nclasses <- length(unique_classes)
}
mean_prof <- matrix(NA, nrow=nclasses, ncol=ncol(profs_resampled_stored)) # mean shape of every class
class_repr <- matrix(NA, nrow=nclasses, ncol=2) # min. distance of class i to centroid and resp. ID
for (i in 1:nclasses) {
# find all profiles belonging to current class
class_i <- which(cidx==unique_classes[i])
# empty cluster
if (!any(class_i)) {
if(!silent) message(paste('-> WARNING: cluster ', i, ' is empty.', sep=""))
next
}
# calculate mean catena (average attributes over all profiles belonging to the current class)
mean_prof[i,] <- apply(profs_resampled_stored[class_i,, drop=F],2,mean)
# find closest catena (=most representative) to each class centre
dists_class_i <- dists[class_i,i] # retrieve distances of catenas of class i to centroid of class i
class_repr[i,2] <- min(dists_class_i) # find minimum distance of class i
j <- min(which(dists_class_i == min(dists_class_i)))
class_repr[i,1] <- class_i[j] # store internal id of closest catena
# TODO: what is this good for?!
Y <- sort(dists_class_i)
ix <- sort(dists_class_i, index.return=T)$ix
if (cf_mode=='singlerun') {
if(!silent) message(paste('% -> WARNING: three closest catenas to centre of class ', i, ' (ext id): ', p_id_unique[class_i[ix[1:min(3,length(ix))]]], sep=""))
}
# draw a separate figure with the cluster centre (mean toposequence) and the closest catena
if (make_plots && FALSE) {
xvec <- c(0:(com_length-1))/(com_length-1)*mean_prof[i,com_length+1]
yvec <- mean_prof[i,1:com_length] * mean_prof[i,com_length+2]
plot(xvec, yvec, type="l", main=paste('mean toposequence and closest catena\nclass ', i, sep=""),
xlab='horizontal length', ylab='relative elevation gain')
lines(profsx[[class_i[j]]], profs[[class_i[j]]]-min(profs[[class_i[j]]]), col="blue")
legend("topleft", c("mean toposequence", "closest catena"), col=c("black", "blue"), lty=c(1,1))
}
# save reclass files
write(file=paste(dir_out,recl_lu,sep="/"), append=ifelse(i==1,FALSE,TRUE), x=paste(p_id_unique[class_i], "=",i," ", unique_classes[i], sep=""))
# TODO: sql class file -> is that needed?
} # end calc mean catena of each class
# in reclass files: all other (unclassified) profiles are assigned nodata
write(file=paste(dir_out,recl_lu,sep="/"), append=T, x="* = NULL")
#---------prepare file output luoutfile
# prepare header line for output file
dumstr <- paste('LU-ID', tab, 'closest_catena', tab, 'closest_dist', tab, 'x_length', tab, 'y_length', sep="")
# write header fields for calculated TC-limits, minimisation of variance
if (ntc > 1) {
for (i in 1:(ntc-1)) {
dumstr <- paste(dumstr, tab, 'lim_var', i, sep="")
}
# write header fields for calculated TC-limits, cluster analysis
for (i in 1:(ntc-1)) {
dumstr <- paste(dumstr, tab, 'lim_clu', i, sep="")
}
}
# write header for all attributes
for (i in 3:length(datacolumns)) {
# write all fields of this attribute for this catena point
for (k in 1:datacolumns[i]) {
# write this attribute for all catena points
for (j in 1:com_length) {
dumstr <- paste(dumstr, tab, attr_names[i], '_p', j, sep="")
# if this is a multi-field attribute...
if (datacolumns[i]>1) dumstr <- paste(dumstr, '_c', k, sep="") # denote field numbering in header
}
}
}
# write header string to output file
write(file=paste(dir_out,luoutfile,sep="/"), append=F, x=dumstr)
#---------end prepare file output
#---------prepare file output tc.dat
dumstr <- 'TC'
# write header for all attributes
for (i in 4:length(datacolumns)) {
# write all fields of this attribute for this catena point
for (k in 1:datacolumns[i]) {
dumstr <- paste(dumstr, tab, attr_names[i], sep="")
# if this is a multi-field attribute...
if (datacolumns[i]>1) dumstr <- paste(dumstr, '_c', k, sep="") # denote field numbering in header
}
}
# write header string to output file tc.dat
write(file=paste(dir_out,tcoutfile,sep="/"), append=F, x=dumstr)
#---------end prepare file output tc.dat
#---------prepare file output r_tc_contains_svc
svc_col_index <- 0
# find index of column containing svcs
for (j in 1:length(attr_names)) {
if (attr_names[j]==svc_column) {
svc_col_index <- j
break
}
}
if (svc_col_index) {
#write header string to output file r_tc_contains_svc
write(file=paste(dir_out,tccontainssvcoutfile,sep="/"), append=F, x='tc_id\tsvc_id\tfraction')
svc_recl_file <- paste('reclass_', attr_names[svc_col_index], '.txt', sep="")
# check existence of file containing original SVC-IDs
if (file.exists(svc_recl_file)) {
svc_recl_dat <- read.table(svc_recl_file, header=T)
mod_svc_ids <- svc_recl_dat$new_id
org_svc_ids <- svc_recl_dat$original_id
} else {
warning(paste0(svc_recl_file, " not found. SVC-ids may have changed, please check."))
# no reclass file found - don't change IDs
mod_svc_ids <- 1:datacolumns[svc_col_index]
org_svc_ids <- mod_svc_ids
}
}
#---------end prepare file output r_tc_contains_svc
if(!silent) message("% OK.")
# decomposition into TCs #-----------------------------------------------------
if(!silent) message("%")
if(!silent) message("% Decomposition into TCs...")
cluster_centers <- matrix(NA, nrow=nclasses, ncol=ncol(profs_resampled_stored)) # initialise matrix for cluster centers for each class
lu_contains_tc <- NULL
if (attr_weights_partition[1]==1) {
if(!silent) message('% -> NOTE: only one TC per LU will be produced!')
}
# save original mean_prof (for later plotting only)
mean_prof_orig_t <- mean_prof
#create labels for LUs
lu_labels=NULL #labels for LUs consisting of appended class memberships for each attribute
for (i in 1:nclasses) {
class_i <- which(cidx==unique_classes[i])
curr_lu_key <- unique(cidx[class_i])
lu_labels=c(lu_labels, curr_lu_key) #ii
}
# PARTITIONING OF MEAN PROFILE FOR EACH LU #
for (i in 1:nclasses) {
if(!silent) message(paste('% -> partitioning class ', i, ' of ', nclasses, sep=""))
# find all profiles belonging to current class
class_i <- which(cidx==unique_classes[i])
# empty cluster
if (!any(class_i)) {
cluster_centers[i,] <- NaN
next
}
# get the ID of the currently treated LU
curr_lu_key <- unique(cidx[class_i])
if(length(curr_lu_key) > 1)
stop("Unexpected error during partitioning of the mean profile for each LU: more than one LU identified in partitioning loop. Contact the package developer!")
# if supplemental data exists, calculate average
if (n_suppl_attributes) {
mean_supp_data_t <- apply(profs_resampled_stored[class_i,(com_length+3):ncol(profs_resampled_stored), drop=F],2, mean)
mean_supp_data <- matrix(mean_supp_data_t, ncol=com_length, nrow=n_supp_data_columns, byrow=T)
} else {
mean_supp_data <- 0 # no supplemental data present
}
# compute x-dimension for current mean profile
xvec <- c(0:(com_length-1))/(com_length-1)*mean_prof[i,(com_length+1)]
# cluster centers can be saved for future use (supervised classification)
if (classify_type=='save') {
tmp_v=NULL
# store data for all supplemental attributes
for (ii in 4:length(datacolumns)) {
# for retrieval of this attribute
attr_start_column <- 1+sum(datacolumns[4:(ii-1)])
attr_end_column <- attr_start_column+datacolumns[ii]-1
# for all points in profile
for (jj in 1:com_length) {
tmp_v <- c(tmp_v, mean_supp_data[attr_start_column:attr_end_column,jj])
}
}
cluster_centers[i,] <- c(mean_prof[i,1:(com_length+2)], tmp_v)
} # end if classify_type==save
# TERRAIN COMPONENTS
# get exact horizontal resolution for exact plotting
dx <- xvec[com_length]/(com_length-1)
if (attr_weights_partition[1]==1) {
# only one TC per LU has to be produced - not much work to be done
lim_var=NULL
lim_clu=NULL
} else {
# decompose profile into terrain components
# [lim_var,lim_clu] = tc_decomp(mean_prof(i,1:com_length), mean_supp_data, datacolumns, attr_weights_partition, xvec,monocrome,plot_approx_ts);
# tc_decomp(prof, supp_data, datacolumns, attr_weights_partition, xvec, monocrome, plot_approx_ts)
# fill hollows/sinks
for (ii in 2:com_length) {
mean_prof[i,1:com_length][ii] <- max(mean_prof[i,1:com_length][ii-1], mean_prof[i,1:com_length][ii])
}
# compute local slopes of profile
prof_slopes <- vector("numeric", length=com_length-1)
# the first and last point are treated differently
prof_slopes[1] <- (mean_prof[i,1:com_length][2]-mean_prof[i,1:com_length][1])/dx
for (ii in 2:(com_length-1)) {
prof_slopes[ii] <- 0.5/dx*((mean_prof[i,1:com_length][ii+1]-mean_prof[i,1:com_length][ii])+(mean_prof[i,1:com_length][ii]-mean_prof[i,1:com_length][ii-1]))
}
prof_slopes[com_length] <- (mean_prof[i,1:com_length][com_length]-mean_prof[i,1:com_length][com_length-1])/dx
# if supplemental data is present
if (n_suppl_attributes>0 && any(attr_weights_partition[4:length(attr_weights_partition)]>0))
{
supp_data_weighted <- NULL
supp_data_weighted <- array(0, dim(mean_supp_data))
# weigh supplemental information according to weighting factors given
for (jj in 4:length(datacolumns)) {
# if an attribute is to be weighted with 0, we can as well skip it
if (attr_weights_partition[jj]==0) next
attr_start_column <- sum(datacolumns[4:(jj-1)])+1
attr_end_column <- attr_start_column+datacolumns[jj]-1
supp_data_weighted[attr_start_column:attr_end_column,] <- mean_supp_data[attr_start_column:attr_end_column,]*attr_weights_partition[jj]/(attr_end_column-attr_start_column+1)
}
# data that is given to partitioning algorithm
#remove columns without variability to conserve space and computation time
to_keep=rep(TRUE, nrow(supp_data_weighted))
for (jj in 1:nrow(supp_data_weighted))
to_keep[jj] = any(supp_data_weighted[jj,1] != supp_data_weighted[jj,]) #check if this row contains different values
supp_data_weighted = supp_data_weighted[to_keep,]
pdata <- rbind(prof_slopes, supp_data_weighted)
} else {
# only the slope data is given to partitioning algorithm
supp_data_weighted <- 0
pdata <- matrix(prof_slopes,nrow = 1)
}
# decomposition using min variance
b_part <- best_partition_new(pdata, attr_weights_partition[1])
qual <- b_part[1] # partitioning quality
best_limits <- b_part[-1] # best limits of partitioning
if(!silent) message(paste('% -> partition by min variance: ', paste(best_limits, collapse=" "),
'; fitting index_v = ', qual, sep=""))
#decomposition using cluster analysis - deactivated
best_limits_c <- NULL
best_limits_c[1:(attr_weights_partition[1]-1)] <- 0
if (FALSE){
clusterdata <- pdata # matrix containing data that is fed to cluster analysis
dummy_dim_appended <- 0 # flag if dummy dimension has already been appended
# modify classification data by adding "location" dimensions [1..lengths]
# until the classification produces spatially continuous clusters
for (j in 1:(length(supp_data_weighted)*10+2)) {
# cluster analysis
kmeans_out <- kmeans(clusterdata, centers=attr_weights_partition[1], nstart=5)
cidxtc <- kmeans_out$cluster # cluster number for each point
cmeanstc <- kmeans_out$centers # matrix of cluster centers
fitc <- kmeans_out$withinss # within-cluster sum of squares, one per cluster
best_limits_c <- NULL
continuous_clusters <- 1
# check if clusters are continuous or distributed
for (k in 1:attr_weights_partition[1]){
xvec <- which(cidxtc==k) # find limits
if (!any(xvec)) next
# check if found cluster is connected / undistributed
if (sqrt(sum((xvec - c(xvec[1]:(xvec[1]+length(xvec)-1)))^2)) != 0) {
continuous_clusters <- 0
break
} else if (xvec[1]!=1) {
# the beginning of the profile is not an actual cluster limit, so don't store this
best_limits_c <- c(best_limits_c, xvec[1])
}
}
# continuous clustering achieved?
if (continuous_clusters) {
break
} else {
if (!dummy_dim_appended) {
# append another dummy- / location dimension (scaled to match the order of magnitude of the original data) and do another loop
clusterdata <- c(clusterdata, c(1:length(prof_slopes))/length(prof_slopes)*mean(clusterdata ))
dummy_dim_appended <- 1
} else {
# double the values of the existing dummy dimension to force the creation of continuous clusters
clusterdata[length(clusterdata)] <- clusterdata[length(clusterdata)]*2
}
}
} # end modify classification data
# continuous clustering achieved?
if (!continuous_clusters) {
if(!silent) message('% -> WARNING: partitioning using cluster analysis failed.')
best_limits_c[1:(attr_weights_partition[1]-1)] <- 0
} else {
# sort limits to ascending order
best_limits_c <- sort(best_limits_c)
if(!silent) message(paste('% -> partition by clustering : ', paste(best_limits_c, collapse=" "), '; fitting index_c = ', sqrt(sum((fitc)^2)), sep=""))
# only for drawing - from beginning till end of profile
best_limits_t <- c(1, best_limits_c, length(mean_prof[i,1:com_length]))
xvec <- seq(0,(com_length-1)*dx, by=dx)
# plot
if (make_plots) {
lines(xvec[best_limits_t], mean_prof[i,1:com_length][best_limits_t], col="green")
# plot limits using vertical lines
for (m in 1:length(best_limits_c)) {
lines(c((best_limits_c[m]-1)*dx, (best_limits_c[m]-1)*dx), c(mean_prof[i,1:com_length][best_limits_c[m]], max(mean_prof[i,1:com_length])*1.1), lty=2, col="green")
}
}
}
}
# save values
lim_var <- best_limits
lim_clu <- best_limits_c
} # end else attr_weights_partition[1]==1
# plot orig
if (make_plots) {
plot(xvec, mean_prof_orig_t[i,1:com_length], type="l", xlab='horizontal length', ylab='relative elevation gain',
main=paste("partitioning class ", i, sep=""))
}
# plot filled
if (make_plots) {
points(xvec, mean_prof[i,1:com_length], pch=1)
}
# plot parameterized slope
if (make_plots) {
if(attr_weights_partition[1]==1) {
lines(c(min(xvec), max(xvec)), mean_prof[i,c(1,com_length)], col="red")
} else {
# only for drawing - from beginning till end
best_limits_t <- c(1, best_limits, length(mean_prof[i,1:com_length]))
lines(xvec[best_limits_t], mean_prof[i,1:com_length][best_limits_t], col="red")
# plot limits using vertical lines
for (ii in 1:length(best_limits)) {
lines(c((best_limits[ii]-1)*dx, (best_limits[ii]-1)*dx), c(0, mean_prof[i,1:com_length][best_limits[ii]]), lty=2, col="red")
}
}
}
# plot legend
if (make_plots) {
# legend("topleft", c("orig. toposequence", "filled profile", "approx. (min var)", "TC boundary (var)",
# "approx. (cluster anal.)", "TC boundary (cluster anal.)"), lty=c(1,NA,1,2,1,2),
# pch=c(NA,1,NA,NA,NA,NA), col=c("black", "black", "red","red","green","green"))
# cluster analysis currently not supported and does not need to appear in plots
legend("topleft", c("orig. toposequence", "filled profile", "approx. (min var)", "TC boundary (var)"),
lty=c(1,NA,1,2), pch=c(NA,1,NA,NA), col=c("black", "black", "red","red"))
}
#----------file output lu.dat
# write LU-ID, closest catena and its distance, catena length and relative elevation
lu_out_dat <- c(i, p_id_unique[class_repr[i,1]], class_repr[i,2], round(mean_prof[i,(com_length+1):(com_length+2)],1))
# write limits of TC-decomposition
lu_out_dat <- c(lu_out_dat, lim_var, lim_clu)
# write elevation data
lu_out_dat <- c(lu_out_dat, round(mean_prof[i,1:com_length],2))
# write for all catena points
for (j in 1:nrow(mean_supp_data)) {
# write data string to output file
lu_out_dat <- c(lu_out_dat, round(mean_supp_data[j,],2))
}
# write into file
write(file=paste(dir_out,luoutfile,sep="/"), append=T, ncolumns=length(lu_out_dat), x=lu_out_dat, sep=tab)
#----------end file output
#----------file TC-output
lim_var_t <- c(0.5, lim_var, com_length+0.5)
# TC fractions
frac_tc <- vector("numeric", length=ntc)
for (j in 1:ntc) {
frac_tc[j] <- (lim_var_t[j+1]-lim_var_t[j])/com_length
}
lim_var_t[1] <- 1
lim_var_t[length(lim_var_t)] <- com_length
# set start of profile to zero
mean_prof[i,1:com_length] <- mean_prof[i,1:com_length]-mean_prof[i,1]
# TC slopes
ts <- 0
slope_tc <- NULL
for (j in 1:ntc) {
if (lim_var_t[j+1]==1) {
# partition at first point reaches halfway to next sampling point
ydist <- 0.5*mean_prof[i,2]* mean_prof[i,com_length+2]
} else if (lim_var_t[j]==com_length) {
# partition at last point start halfway from previous sampling point
ydist <- 0.5*(mean_prof[i,com_length]-mean_prof[i,com_length-1])* mean_prof[i,com_length+2]
} else {
if (lim_var_t[j+1]==com_length && j==(ntc-1)) {
# if the last TC is one sampling point only, the second-to-last ends halfway before last sampling point
ydist <- (0.5*(mean_prof[i,com_length]+mean_prof[i,com_length-1])-mean_prof[i,lim_var_t[j]])* mean_prof[i,com_length+2]
} else {
# normal case
ydist <- (mean_prof[i,lim_var_t[j+1]]-mean_prof[i,lim_var_t[j]])* mean_prof[i,com_length+2]
}
}
xdist <- mean_prof[i,com_length+1] * frac_tc[j]
slope_tc[j] <- 100*ydist/xdist
ts <- ts+ydist
}
# write data fields for all TCs
lu_id <- i
for (j in 1:ntc) {
dumstr <- ''
treated_attribs <- 0
tc_id <- (lu_id-1)*ntc+j # generate a unique number for the TC
from_point <- lim_var_t[j]
till_point <- lim_var_t[j+1]
# write data for all supplemental attributes
for (ii in 4:length(datacolumns)) {
# write all fields of this attribute for this catena point
for (k in 1:datacolumns[ii]) {
tmp_v <- round(mean(mean_supp_data[treated_attribs+k,from_point:till_point]),5)
dumstr <- paste(dumstr, paste(tmp_v,collapse=tab),sep=tab)
#---------output r_tc_contains_svc.dat
# if this is the svc column...
if (ii==svc_col_index) {
# print out svc-fration, if greater than 0
if (tmp_v) {
# fraction of svc in current tc to output file r_tc_contains_svc
write(file=paste(dir_out,tccontainssvcoutfile,sep="/"), append=T, x=paste(tc_id, org_svc_ids[k], tmp_v, sep=tab))
}
}
#---------end output r_tc_contains_svc.dat
}
# increase counter for attributes already treated
treated_attribs <- treated_attribs+k
}
# write constitution of this TC (tc.dat)
write(file=paste(dir_out,tcoutfile,sep="/"), append=T, x=paste(tc_id, paste(dumstr,collapse=tab), sep=""))
} # end loop over ntc
tc_ids <- c(1:ntc)+(lu_id-1)*rep(1,ntc)*ntc
lu_contains_tc <- rbind(lu_contains_tc, cbind(i*rep(1,ntc), tc_ids, round(frac_tc,5), 1:ntc, slope_tc))
#----------end file TC-output
} # end loop over nclasses for TC decomposition
write.table(file=paste(dir_out,"lu_labels.dat",sep="/"), x=data.frame(no=1:nclasses, lu_labels=lu_labels), append=F, row.names=FALSE, quote=FALSE, sep=tab) #label file
# close plot output device
if (make_plots)
dev.off()
# save remaining classification results
# cluster centers can be saved for future use (supervised classification, single run only)
if (classify_type=='save'){
save('cluster_centers','com_length','datacolumns','attr_names',file=paste(dir_out,saved_clusters,sep="/"));
if(!silent) message(paste("% -> NOTE: saved cluster centers to ", dir_out, "/", saved_clusters, sep=""))
}
# write LU contains TC files
colnames(lu_contains_tc) <- c("lu_id", "tc_id", "fraction", "position", "slope")
lu_contains_out <- matrix(lu_contains_tc[,-5], ncol=4, dimnames=list(NULL, colnames(lu_contains_tc)[-5])) # ensure matrix if even if only one row exists
write.table(lu_contains_out, file=paste(dir_out,lucontainstcoutfile,sep="/"), quote=F, col.names=T, row.names=F, sep=tab)
tc_dat <- cbind(lu_contains_tc[,2], rep("NA",nrow(lu_contains_tc)), round(lu_contains_tc[,5],2), rep(NA,nrow(lu_contains_tc)), rep(NA,nrow(lu_contains_tc)), rep(NA,nrow(lu_contains_tc)))
colnames(tc_dat) <- c("pid", "description", "slope", "frac_rocky", "beta_fac", "sdr")
write.table(tc_dat, file=paste(dir_out,terraincomponentsoutfile,sep="/"), quote=F, col.names=T, row.names=F, sep=tab)
# stop sinking
closeAllConnections()
# restore original warning mode
if(silent)
options(warn = oldw)
if(!silent) message("% OK.")
if(!silent) message('%')
if(!silent) message("% DONE!")
if(!silent) message("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
# if an error occurs...
} ,
error = function(e) {
# stop sinking
closeAllConnections()
if (make_plots) dev.off() #close open devices
# restore original warning mode
if(silent)
options(warn = oldw)
stop(paste(e))
})
} # EOF
### INTERNAL FUNCTIONS USED BY prof_class ###----------------------------------
# delivers partition quality: sum of weighted variances of subdivisions #------
get_part_quality <- function(data_mat, lim, cur_best=Inf) {
# get number of points contained in this data (sub-)set
n_points_in_data_mat <- ncol(data_mat)
qual <- 0
lim <- c(1, lim, n_points_in_data_mat+1)
# function get_part_quality() in original matlab script
for (iii in 1:(length(lim)-1)) {
part_start <- lim[iii] # start index of current partition
part_end <- max(part_start,lim[iii+1]-1) # end index of current partition
if (part_start==part_end) {
next # this CAN be done because the variance in a subsection that contains only one point is zero
# it MUST be done because the var() function doesn't work as intended with one column matrices
# if you use an alternative quality formula, remove the continue
}
# factor for weighting the variance of this subdivisions in the overall variance
wf <- (part_end-part_start)
# sum up weighted variances of subdivisions
w_var = wf*sum(apply(data_mat[,part_start:part_end, drop=FALSE], MAR=1, FUN = var))
qual = qual + w_var
if (qual >= cur_best) break #the current best is already reached or exceed, don't look any further
}
return(qual)
} # EOF
# partitioning of a hillslope into Terrain Components #------------------------
best_partition <- function(data_mat, no_part, cur_best=Inf) {
n_points_in_data_mat <- ncol(data_mat)
# partition the vector in 2 only subdivisions
if (no_part==2) {
# initial value for loop
best_lim_qual <- cur_best
best_limit = 1
startat <- 1
for (jj in startat:n_points_in_data_mat) {
# get quality of this subdivision
lim_qual <- get_part_quality(data_mat, jj, best_lim_qual)
# this subdivision is better than the previous best
if (lim_qual < best_lim_qual) {
best_lim_qual <- lim_qual # set new best subdivision
best_limit <- jj
}
} # end loop over data_mat
# return the best limitation found and its quality
return(c(best_lim_qual, best_limit))
} else {
# initial value for loop
best_lim_qual <- Inf #cur_best
bl = 1 #default: nothing better found
for (ii in 1:(n_points_in_data_mat-no_part-2)) {
# get best partitioning inside the remaining part - the returned quality value is not used
bp <- best_partition(data_mat[,ii:n_points_in_data_mat, drop=FALSE], no_part-1, best_lim_qual)
lim_qual <- bp[1]
if (lim_qual > best_lim_qual) next #no use searching any further
best_limits <- bp[2:(no_part-1)]
# with a limit at ii, the rest is best partitioned as contained in best_limits
this_i_best_limits <- c(ii, best_limits+ii-1)
# get the overall quality of this partitioning
lim_qual <- get_part_quality(data_mat, this_i_best_limits, best_lim_qual)
# this subdivision is better than the previous best
if (lim_qual < best_lim_qual) {
# set new best subdivision
best_lim_qual <- lim_qual
bl <- this_i_best_limits
}
}
# return the best limitation found and its quality
return(c(best_lim_qual, bl))
} # end if no_part==2
} # EOF
best_partition_new <- function(data_mat, no_parts, start=NULL)
{
shift_break = function(breaks, break_no, n_points_in_data_mat)
{
no_parts=length(breaks)+1
if (breaks[break_no] < n_points_in_data_mat - (no_parts-1-break_no) ) #shifting possible?
breaks[break_no:(no_parts-1)] = breaks[break_no] + (1:(no_parts-break_no)) else
breaks=NA #end reached
return(breaks)
}
best_lim_qual=Inf
variances = array(NA, no_parts)
n_points_in_data_mat <- ncol(data_mat)
breaks = 1:(no_parts-1)
active_break = no_parts-1 #start shifting here
turnover=FALSE
if (!is.null(start)) breaks=start
while (TRUE)
{
#shift active break
breaks_new = shift_break(breaks, break_no=active_break, n_points_in_data_mat = n_points_in_data_mat )
if (is.na(breaks_new[1]))
{
if (active_break == 1) {
break
} else #no previous break to shift, all done
active_break=active_break-1 #shift previous break
turnover=TRUE #remember to change to back next break after next iteration
variances[active_break:(no_parts-1)]=NA #demark variances that need updating
next
} else
breaks=breaks_new
variances[active_break]=NA #from here, new calculation of variance is necessary
if (turnover) active_break = no_parts-1 #back to shifting last break
stepup_again=0
#update highest missing variance until rest
start_part=min(which(is.na(variances))) #no need to update all values, some did not change
qual=sum(variances[1:(start_part-1)], na.rm=TRUE)
lim <- c(1, breaks, n_points_in_data_mat+1)
for (iii in start_part:(length(lim)-1)) {
part_start <- lim[iii] # start index of current partition
part_end <- max(part_start,lim[iii+1]-1) # end index of current partition
if (part_start==part_end) {
variances[iii]=0
next # this CAN be done because the variance in a subsection that contains only one point is zero
# it MUST be done because the var() function doesn't work as intended with one column matrices
}
# factor for weighting the variance of this subdivisions in the overall variance
wf <- (part_end-part_start)
# sum up weighted variances of subdivisions
variances[iii] = wf*sum(apply(data_mat[,part_start:part_end, drop=FALSE], MAR=1, FUN = var))
qual = qual + variances[iii]
if (qual >= best_lim_qual)
{
active_break=min(active_break, iii)
break #abort, if already worse than current best and modify this break
}
}
if (qual < best_lim_qual) { #new optimun found
# set new best subdivision
best_lim_qual <- qual
bl <- breaks
}
}
return(c(best_lim_qual, bl))
}
#
#
#
# system.time({ıb2=best_partition(vec, 5)})
#
# b2
#
# get_part_quality(vec, b2)
# get_part_quality(vec, a[-1])
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.