Nothing
fmdsd <-
function(xf, group.name="group", gaussiand=TRUE,
distance = c("jeffreys", "hellinger", "wasserstein", "l2", "l2norm"),
windowh=NULL, data.centered=FALSE, data.scaled=FALSE, common.variance=FALSE,
add = TRUE, nb.factors=3, nb.values=10, sub.title="",
plot.eigen=TRUE, plot.score=FALSE, nscore=1:3, filename=NULL)
{
#require(e1071)
#---------------
# Preliminaries
#---------------
if (!(is.folder(xf) | is.data.frame(xf))){
stop("fmdsd applies to a data frame or an object of class 'folder'.")
}
if (is.folder(xf)) {
# Convert the data folder into a data frame
x <- as.data.frame(xf, group.name = group.name)
}
if (is.data.frame(xf)) {
# Is group.name the name of a column of x?
if (!group.name %in% names(xf))
stop(paste0("xf has no column named '", group.name, "'."))
# x will contain the data.frame
x <- xf[-which(colnames(xf) == group.name)]
# Last column: the grouping variable
x[group.name] <- xf[, group.name]
# Convert the data.frame into a folder
xf <- as.folder(xf, groups = group.name)
}
# p denotes the dimension of the data
p<-ncol(x)-1;
# # The initial data is preserved in x0
# # (if the data are centered or reduced, x will contain them)
# x0<-x
# Rename the last column of x as 'group'
last.column.name=colnames(x)[ncol(x)]
colnames(x)[ncol(x)] <- "group"
group<-as.factor(x$group);
nb.groups<-length(levels(group));
levgroup<-levels(group);
# Controls and error messages
# on data
if (!all(apply(as.data.frame(x[,1:p], stringsAsFactors = TRUE), 2, is.numeric)))
stop("The variables must be numeric!")
if (any(is.na(x)))
stop("There are NAs in the folder")
# on the window or window parameter
if (!is.null(windowh))
{if (is.numeric(windowh))
{if (length(windowh) > 1)
{stop("windowh must be either a numeric value, or a list of matrix")
}
if (windowh < .Machine$double.eps)
{stop("windowh must be strictly positive!")
}
} else
{if (is.list(windowh))
{if (is.null(names(windowh)))
{stop("the elements of the windowh list must be named")
} else
{if (min(names(windowh)==levgroup)<1)
{stop("the names of the windowh list must be the group names")
}
}
if (p >1)
{if (min(unlist(lapply(windowh, det))) < .Machine$double.eps)
{stop("All elements of windowh must be positive matrices!")
}
} else
{if (min(unlist(windowh)) < .Machine$double.eps)
{stop("All elements of windowh must be strictly positive!")
}
}
}
}
}
# Only distances available: "l2" (L^2 distance), "l2norm" (L^2 distance between normalized densities),
# "hellinger" (Hellinger/Matusita distance), "jeffreys" (symmetric Kullback-Leibler divergence)
# or "wasserstein" (Wasserstein distance).
distance <- match.arg(distance)
if ( (!gaussiand) & (distance %in% c("hellinger", "jeffreys", "wasserstein")) ) {
warning(paste0("distance=", distance, " is not avalaible for non-Gaussian densities.\nSo the argument distance is set to 'l2'."))
distance <- "l2"
}
if (gaussiand & !is.null(windowh)) {
warning("The argument windowh is not taken into account for Gaussian densities.")
}
# For now, the only choice of kernel is the Gaussian kernel.
if (!gaussiand)
{kern = "gauss"} else
kern = ""
# Control and error message
if (min(table(group)) <= 1)
stop("There should be more than one observation in each group")
# Control of nb.factors and nb.values
if (nb.groups <= nb.values)
{nb.values <- nb.groups - 1
}
if (nb.groups <= nb.factors)
{nb.factors <- nb.groups - 1
}
# Mean per group
moyL<-by(as.data.frame(x[,1:p], stringsAsFactors = TRUE),INDICES=group,FUN=colMeans);
moyL0<-moyL
if(data.centered)
{# Centering data
for (i in 1:nb.groups)
{moyL[[i]]<-numeric(p)
x[x$group==levgroup[i],1:p]=scale(x[x$group==levgroup[i],1:p],scale=F)
}
}
# Variance per group
varL<-by(as.data.frame(x[,1:p], stringsAsFactors = TRUE),INDICES=group,FUN=var);
varL0<-varL
# Correlation matrix or correlation coefficient per group
corL=varL
if (p>1)
{corL<-by(as.data.frame(x[,1:p], stringsAsFactors = TRUE),INDICES=group,FUN=cor);
} else
{for (i in 1:nb.groups)
{corL[[i]]<-1
}
}
if(data.scaled)
{# scaling data
varL<-corL
for (i in 1:nb.groups)
{x[x$group==levgroup[i],1:p]=scale(x[x$group==levgroup[i],1:p])
}
}
if(common.variance)
{for (i in 1:nb.groups)
{varL[[i]]<-var(x[,1:p])
}
}
# Skewness and kurtosis coefficients per group
#require(e1071)
skewnessL <- by(as.data.frame(x[, 1:p], stringsAsFactors = TRUE), INDICES=group, FUN=apply, 2, skewness)
kurtosisL <- by(as.data.frame(x[, 1:p], stringsAsFactors = TRUE), INDICES=group, FUN=apply, 2, kurtosis)
# Distance: string which will be returned
dist.print <- distance
if (gaussiand) { # Gaussian case
# If distance == "l2norm" (i.e. L^2-distance between normed densities) then:
# - dist = "l2"
# - normed = TRUE
if (distance == "l2norm") {
distance <- "l2"
normed <- TRUE
} else {
normed <- FALSE
}
switch(distance,
"l2" = {
W = matipl2dpar(moyL, varL)
norme<-vector("numeric",nb.groups);
for (i in 1:nb.groups) {
norme[i]<-sqrt(W[i,i])
}
if(normed) {
# Calculus of the matrix W of the normed pca
for (i in 1:nb.groups) {
for (j in 1:i) {
W[i,j]<-W[i,j]/(norme[i]*norme[j])
}
}
for (i in 1:(nb.groups-1)) {
for (j in (i+1):nb.groups) {
W[i,j]<-W[j,i]
}
}
}
matdist <- diag(0, nb.groups, nb.groups)
dimnames(matdist) <- list(levels(group), levels(group))
if (!normed) {
for (i in 2:nb.groups) for (j in 1:(i-1)) {
matdist[i, j] <- matdist[j, i] <- sqrt(W[i, i] + W[j, j] - 2*W[i, j])
}
} else {
for (i in 2:nb.groups) for (j in 1:(i-1)) {
matdist[i, j] <- matdist[j, i] <- sqrt(2 - 2*W[i, j])
}
}
matdist <- as.dist(matdist)
},
"jeffreys" = {
matdist <- matjeffreys(xf)
},
"hellinger" = {
matdist <- mathellinger(xf)
},
"wasserstein" = {
matdist <- matwasserstein(xf)
}
)
} else { # non Gaussian case: Gaussian kernel method
# kern <- "gauss"
# If distance == "l2norm" (i.e. L^2-distance between normed densities) then:
# - dist = "l2"
# - normed = TRUE
if (distance == "l2norm") {
distance <- "l2"
normed <- TRUE
} else if (distance == "l2") {
normed <- FALSE
}
# For now, only L^2-distance is available.
# switch(distance,
# "l2" = {
#---------------
# Calculus of the inner products matrix W
#---------------
choix = character(3)
# Choice of the dimension
# "m" : multivariate ; "u" : univariate
if (p > 1) {
choix[1] = "m"
} else{
choix[1] = "u"
}
# Choice of the kernel
# "g" : gaussian kernel; "." : not applicable
# This option offers a limited choice as the only available kernel is the
# gaussian kernel
if (kern == "gauss") {
choix[2] = "g"
} else {
choix[2] = "."
}
# Choice of the window
# The window is given by the user in the "windowh" parameter as
# "l" : list of (definite positive) matrices
# "n" : positive number (common to all densities) with which the variance
# matrices are multiplied
# "a" : NULL, that is the matrice variance of each density is multiplied by the
# AMISE window (internally computed by the "" function).
# "." : not applicable
if (is.null(windowh)) {
choix[3] = "a"
} else {
if (p == 1) {
if (length(windowh) == 1) {
choix[3] = "n"
} else {
choix[3] = "l"
}
} else {
if (is.list(windowh)) {
choix[3] = "l"
} else {
choix[3] = "n"
}
}
}
choix = paste(choix, collapse = "")
# Calculus of the inner products between densities
switch(choix,
# Case: multivariate, non Gaussian distribution, density estimated using
# Gaussian kernel and AMISE window
mga = {
nbL<-by(x[,1:p],INDICES=group,FUN=nrow);
wL<-bandwidth.parameter(p,nbL)
# Multiplication of the variance by the window parameter
varLwL<-varL
for (i in 1:nb.groups) {
varLwL[[i]]<-varL[[i]]*(wL[[i]]^2)
}
W = matipl2d(xf, method = "kern", varLwL)
},
# Case univariate, non gaussian distributions estimated by gaussian kernel
# method, and AMISE windows
uga = {
nbL<-by(as.data.frame(x[,1:p], stringsAsFactors = TRUE),INDICES=group,FUN=NROW);
wL<-bandwidth.parameter(p,nbL)
# Multiplication of the variance by the window parameter
varLwL<-varL
for (i in 1:nb.groups) {
varLwL[[i]]<-varL[[i]]*(wL[[i]]^2)
}
W = matipl2d(xf, method = "kern", varLwL)
},
# Case: multivariate, non gaussian distributions estimed by gaussian kernel
# method, and bandwith parameter, common to all densities, given by the user
mgn = {
nbL<-by(x[,1:p],INDICES=group,FUN=nrow);
# Multiplication of the variance by the window parameter
varLwL<-varL
for (i in 1:nb.groups) {
varLwL[[i]]<-varL[[i]]*(windowh^2)
}
W = matipl2d(xf, method = "kern", varLwL)
},
# Case univariate, non gaussian distributions estimed by gaussian kernel
# method, and bandwith parameter, common to all densities, given by the user
ugn = {
nbL<-by(as.data.frame(x[,1:p], stringsAsFactors = TRUE),INDICES=group,FUN=NROW);
# Multiplication of the variance by the window
varLwL<-varL
for (i in 1:nb.groups) {
varLwL[[i]]<-varL[[i]]*(windowh^2)
}
W = matipl2d(xf, method = "kern", varLwL)
},
# Case: multivariate, non gaussian distributions estimated by gaussian kernel
# method, and windows given as a list of matrices
mgl = {
W = matipl2d(xf, method = "kern", windowh)
},
# Case univariate, non gaussian distributions estimated by gaussian kernel
# method, and windows given as a list of numbers
ugl = {
W = matipl2d(xf, method = "kern", windowh)
}
)
norme<-vector("numeric",nb.groups);
for (i in 1:nb.groups) {
norme[i]<-sqrt(W[i,i])
}
if(normed) {
# Calculus of the matrix W of the normed pca
for (i in 1:nb.groups) {
for (j in 1:i) {
W[i,j]<-W[i,j]/(norme[i]*norme[j])
}
}
for (i in 1:(nb.groups-1)) {
for (j in (i+1):nb.groups) {
W[i,j]<-W[j,i]
}
}
}
matdist <- diag(0, nb.groups, nb.groups)
dimnames(matdist) <- list(levels(group), levels(group))
if (!normed) {
for (i in 2:nb.groups) for (j in 1:(i-1)) {
matdist[i, j] <- matdist[j, i] <- sqrt(W[i, i] + W[j, j] - 2*W[i, j])
}
} else {
for (i in 2:nb.groups) for (j in 1:(i-1)) {
matdist[i, j] <- matdist[j, i] <- sqrt(2 - 2*W[i, j])
}
}
matdist <- as.dist(matdist)
}
# )
# }
# add.cst <- FALSE
cmds <- cmdscale(matdist, k = nb.factors, eig = TRUE, add = add, x.ret = TRUE)
# Eigenvalues to display
ep <- cmds$eig
epaff <- ep[1:nb.values]
# Scores to display
coor <- cmds$points
# Change of the names of the grouping variable in the returned results
names(attributes(moyL)$dimnames) <- last.column.name
names(attributes(varL)$dimnames) <- last.column.name
names(attributes(corL)$dimnames) <- last.column.name
names(attributes(skewnessL)$dimnames) <- last.column.name
names(attributes(kurtosisL)$dimnames) <- last.column.name
# Creation of the list of results in an object of class 'fmdsd' :
results <- list( call=match.call(),
group=last.column.name,
variables=colnames(x)[1:p],
d=dist.print,
inertia=data.frame(eigenvalue=epaff,
inertia=round(1000*epaff/sum(abs(ep)))/10, stringsAsFactors = TRUE),
scores=data.frame(levgroup,PC=coor, stringsAsFactors = TRUE))
# Change of the name of the grouping variable in the data frame of scores
colnames(results$scores)[1]=last.column.name
# Outputting the moments
results <- c(results, list(means=moyL,
variances=varL,
correlations=corL,
skewness=skewnessL,
kurtosis=kurtosisL))
class(results) <- "fmdsd"
# Save this list in a file (if a filename is given)
if (! is.null(filename))
{save(results,file = filename)
}
# Barplot of the inertia
if (plot.eigen)
{inertia=results$inertia$inertia
barplot(inertia, main="Inertia",
names.arg=1:length(inertia),
cex.names=1)
}
# Plotting the scores on the principal planes (1,2),(1,3) et (2,3)
# Plane (a,b)
if (plot.score)
{plot(results, nscore=nscore, sub.title=sub.title)
}
# Returning the list of results
return(results)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.