DataDensityEstimationByIntBin=function(Data){
# estimate data density using IntegerBinning
# calulate number of points in each group of the data, NaN within a class are counted to be an element
# Cls == NaN are considered to belong to one class at the end of UniqueClasses
#
# INPUT
# Data(1:n,1:d) Data
#
# OUTPUT
# DensityEsimation(1:n) density estimation = number of CaseIntegerValues for all cases
# AnzPerUniqCV nr of cases per unique(CaseIntegerValues)
# UniqueCV unique(CaseIntegerValues)
# CaseIntegerValues =IntegerBinning(Data);
# jedem fall ein integer bin zuordnen
CaseIntegerValues =IntegerBinning(Data)
# die unique CaseIntegerValues, wichtig Index der jedem uniq wieder allen zuordnet
UniqueCV = unique(CaseIntegerValues,fromLast = FALSE)
#[UniqueCV,dummy,IndUniq2All] = unique
#[C,ia,ic] = unique(A)
#If A is a vector, then C = A(ia) and A = C(ic), also #A=UniqueCV[IndUniq2All]
#A=c(9,2,9,5)
#UniqueCV=unique(A)
#UniqueCV[match(A,UniqueCV)] -> A
IndUniq2All=match(CaseIntegerValues,UniqueCV)
# jetzt zaehlen wieviel es vovon gibt
#AnzPerUniqCV = hist(CaseIntegerValues,UniqueCV,plot = FALSE,freq = TRUE)
#hist uebernimmt nicht die grenzen correct
#1alternative: eigene implementierung zu anderem zweck:
#V=internpiechart(CaseIntegerValues,UniqueCV,UniqueCV)
#AnzPerUniqCV=V$Count
# 2. alternative
Counts = as.matrix(table(CaseIntegerValues))
#Reihenfolge passend zu IndUniq2All
AnzPerUniqCV=Counts[match(table = rownames(Counts),UniqueCV)]
#alternative ist zu testen
#ist anscheinend das gleiche:
#plot(Counts[match(names(V$Count),rownames(Counts))],V$Count)
# Rueckuebertrage auf alle daten
DensityEsimation = AnzPerUniqCV[IndUniq2All]
return(list(DensityEsimation=DensityEsimation,AnzPerUniqCV=AnzPerUniqCV,UniqueCV=UniqueCV,CaseIntegerValues=CaseIntegerValues))
}
internpiechart=function(Datavector,Names,Labels,MaxNumberOfSlices,col){
# internpiechart(Datavector,Names,Labels,main='',col,MaxPercentage=FALSE,ShrinkPies=0.05,Rline=1.1)
#
# intern function
#
#
# INPUT
# \item{Datavector [1:n] a vector of n non unique values
# \item{Names names to search for in Datavector, if not set \code{unique} of Datavector is calculated.
# Labels [1:k] Labels if they are specially named, if not Names are used.
# MaxNumberOfSlices integer, how many slices should be presented at maximum?
# \item{main}{
# title below the fan pie, see \code{plot}
# }
# \item{col}{default as other colors in this packages, else the same as in \code{plot}
# }
#
# A normal pie plot is dificult to interpret for a human observer, because humans are not trained well to observe angles [Gohil, 2015, p. 102]. Therefore, the fan plot is used. As proposed in [Gohil 2015] the \code{fan.plot}() of the \code{plotrix} package is used to solve this problem.
#
# # OUTPUT
# silent output by calling \code{invisible} of a list with
# \item{Percentages}{
# [1:k] percent values visualized in fanplot
# }
# \item{Labels}{
# [1:k] see input \code{Labels}, only relevant ones
# Cols[1:k]
# Names[1:k]
#Datavector= checkFeature(Datavector,'Datavector')
nas=which(is.na(Datavector))
if(length(nas)>0){
Datavector[nas]='Missing (NA)'
print('Note: NA values found.')
}
if(is.numeric(Datavector)){
nas=which(!is.finite(Datavector))
if(length(nas)>0){
Datavector=as.character(Datavector)
Datavector[nas]='NaN'
print('Note: Infinitive and/or NaN values found.')
}
}
if(missing(Names))
Names=sort(unique(Datavector),na.last = T)
n=length(Datavector)
m=length(Names)
if(missing(Labels))
Labels=as.character(Names)
k=length(Labels)
if(missing(MaxNumberOfSlices)){
MaxNumberOfSlices=k
}else{
if(identical(FALSE,MaxNumberOfSlices)){
MaxNumberOfSlices=k
}
if(!isTRUE(MaxNumberOfSlices)&!is.numeric(MaxNumberOfSlices)){
warning('MaxNumberOfSlices could not be interpreted because it is not numeric or TRUE, using MaxNumberOfSlices=TRUE.')
MaxNumberOfSlices=TRUE
}
if(length(MaxNumberOfSlices)<1|length(MaxNumberOfSlices)>1){
MaxNumberOfSlices=TRUE
warning('MaxNumberOfSlices could not be interpreted because it is a of length unequal 1, using MaxNumberOfSlices=TRUE.')
}
if(MaxNumberOfSlices<1){
MaxNumberOfSlices=TRUE
warning('MaxNumberOfSlices value below 1, using MaxNumberOfSlices=TRUE.')
}
if(MaxNumberOfSlices>k){
warning(paste('Setting MaxNumberOfSlices at number of unique values',k,'because its too high.'))
}
}
if(m!=k){
warning('Length if Names does not equal length of Labels, using names as Labels')
Labels=as.character(Names)
}
allu=unique(Datavector)
tmp=setdiff(allu,Names)
if(length(tmp)>0){
print('These Names where additionally in the Datavector;')
print(tmp)
indnonex=Datavector %in% tmp
Datavector[indnonex]='other'
Names=c(Names,'other')
Labels=c(Labels,'other')
m=m+1
}
count=c()
for(i in 1:m){
tempind=Datavector==Names[i]
count[i]=sum(tempind,na.rm = T)
}
indmissing=which(count==0)
if(length(indmissing)>0){
warning(paste0(length(indmissing),' Names could not be found and will not be shown in the fan plot.'))
ind=which(count>0)
if(length(ind)>0){
count=count[ind]
Labels=Labels[ind]
Names=Names[ind]
}
}
names(count)=Names
pct=round((count/sum(count))*100,2)
if(length(pct)>MaxNumberOfSlices){
requireNamespace('ABCanalysis')
abc=ABCanalysis::ABCanalysis(pct)
if(!isTRUE(MaxNumberOfSlices)){
if(length(abc$Aind)<MaxNumberOfSlices){#kein abc sondern einfach nuer die hoechsten
print('Not enough slices in group A of ABCanalysis comparing to MaxNumberOfSlices. Adding slices of highest percentage following after group A.')
pct2=sort(pct,decreasing = T,na.last = T)
tempind=seq(from=(MaxNumberOfSlices+1),to=length(pct),by=1)
pct_tmp=sum(pct2[tempind])
pct2=pct2[seq(from=1,to=MaxNumberOfSlices,by=1)]
uuu=unique(names(pct2))
pct=c(pct2,pct_tmp)
}
if(length(abc$Aind)==MaxNumberOfSlices){#kein abc sondern einfach nuer die hoechsten
pct2=pct[abc$Aind]
uuu=unique(names(pct2))
pct=c(pct2,(sum(pct[abc$Bind])+sum(pct[abc$Cind])))
}
if(length(abc$Aind)>MaxNumberOfSlices){ #abc gruppe zu gross, reduziere
pct2=pct[abc$Aind]
print('Too many slices in group A of ABCanalysis comparing to MaxNumberOfSlices. Selecting the most frequent subgoup out of group A.')
pct2=sort(pct2,decreasing = T)
pct_tmp=sum(pct2[seq(from=MaxNumberOfSlices+1,to=length(pct2),by=1)],na.rm = T)
pct2=pct2[seq(from=1,to=MaxNumberOfSlices,by=1)]
uuu=unique(names(pct2))
pct=c(pct2,(sum(pct[abc$Bind])+sum(pct[abc$Cind])+pct_tmp))
}
}else{
pct2=pct[abc$Aind]
uuu=unique(names(pct2))
pct=c(pct2,(sum(pct[abc$Bind])+sum(pct[abc$Cind])))
}
Labels=c(Labels[match(uuu,Names)],'Other')
}
LabelsOut=Labels
knew=length(Labels)
indother=which(Labels=='Other')
if(length(indother)==1){
names(pct)[indother]='Other'
}
Labels=paste0(Labels,': ',pct,"%")
inds=which(pct==0)
Labels[inds]=gsub('0%','<0.01%',Labels[inds])
if(missing(col)){
colors=DataVisualizations::DefaultColorSequence[1:knew]
}else{
if(length(col)==knew)
colors=col
else{
warning('Length of colors doesnt match found names defined as labels.')
colors=DataVisualizations::DefaultColorSequence[1:knew]
}
}
return(list(Percents=pct,Labels=Labels,Names=LabelsOut,Cols=colors,Count=count))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.