Nothing
knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 6, fig.height = 5 ) library(RSDA)
install.packages("RSDA", dependencies=TRUE)
devtools::install_github("PROMiDAT/RSDA")
ex3 <- read.sym.table(file = 'tsym1.csv', header=TRUE, sep=';',dec='.', row.names=1) ex3
write.sym.table(ex3, file = 'tsymtemp.csv', sep = ';',dec = '.', row.names = TRUE, col.names = TRUE)
data(example3)
example3
example3[2,] example3[,3] example3[2:3,5] example3$F1
data(ex1_db2so)
ex1_db2so
The classic.to.sym
function allows to convert a traditional table into a symbolic one, to this we must indicate the following parameters.
x
= a data.frameconcept
= variables to be used as a conceptvariables
= variables to be used, conceptible with tidyselect optionsdefault.numeric
= function that will be used by default for numerical values (sym.interval)default.categorical
= functions to be used by default for categorical values (sym.model)result <- classic.to.sym(x = ex1_db2so, concept = c(state, sex), variables = c(county, group, age)) result
We can add new variables indicating the type we want them to be.
result <- classic.to.sym(x = ex1_db2so, concept = c("state", "sex"), variables = c(county, group, age), age_hist = sym.histogram(age, breaks = pretty(ex1_db2so$age, 5))) result
data(USCrime) head(USCrime)
result <- classic.to.sym(x = USCrime, concept = state, variables= c(NumInShelters, NumImmig, ViolentCrimesPerPop), ViolentCrimesPerPop_hist = sym.histogram(ViolentCrimesPerPop, breaks = pretty(USCrime$ViolentCrimesPerPop,5))) result
data("ex_mcfa1") head(ex_mcfa1)
sym.table <- classic.to.sym(x = ex_mcfa1, concept = suspect, variables=c(hair, eyes, region), default.categorical = sym.set) sym.table
We can modify the function that will be applied by default to the categorical variables
sym.table <- classic.to.sym(x = ex_mcfa1, concept = suspect, default.categorical = sym.set) sym.table
hani3101 <- SDS.to.RSDA(file.path = "hani3101.sds") hani3101
# We can save the file in CSV to RSDA format as follows: write.sym.table(hani3101, file='hani3101.csv', sep=';', dec='.', row.names=TRUE, col.names=TRUE)
abalone <- SODAS.to.RSDA("abalone.xml") abalone
write.sym.table(abalone, file='abalone.csv', sep=';', dec='.', row.names = TRUE, col.names = TRUE)
data(example3) mean(example3$F1) mean(example3[,1])
mean(example3$F2) mean(example3[,2])
mean(example3$F2,method = "interval") mean(example3[,2],method = "interval")
median(example3$F1) median(example3[,1])
median(example3$F2) median(example3[,2])
median(example3$F6, method = 'interval') median(example3[,6], method = 'interval')
var(example3[,1]) var(example3[,2]) var(example3$F6) var(example3$F6, method = 'interval') var(example3$F6, method = 'billard') sd(example3$F1) sd(example3$F2) sd(example3$F6) sd(example3$F6, method = 'interval') sd(example3$F6, method = 'billard')
cor(example3$F1, example3$F4) cor(example3[,1], example3[,4]) cor(example3$F2, example3$F6, method = 'centers') cor(example3$F2, example3$F6, method = 'billard')
library(ggpolypath) data(oils) oils <- RSDA:::to.v3(RSDA:::to.v2(oils)) sym.radar.plot(oils[2:3,]) sym.radar.plot(oils[2:5,]) res <- interval.histogram.plot(oils[,2], n.bins = 4, col = c(2,3,4,5)) res res <- interval.histogram.plot(oils[,3], n.bins = 3, main = "Histogram", col = c(2, 3, 4)) res
data("oils") DM <- sym.dist.interval(sym.data = oils[,1:4], method = "Gowda.Diday") model <- hclust(DM) plot(model, hang = -1)
DM <- sym.dist.interval(sym.data= oils[,1:4], method = "Ichino") model <- hclust(DM) plot(model, hang = -1)
DM <- sym.dist.interval(sym.data = oils[,c(1,2,4)], gamma = 0.5, method = "Hausdorff", normalize = FALSE, SpanNormalize = TRUE, euclidea = TRUE, q = 2) model <- hclust(DM) plot(model, hang = -1)
data(int_prost_train) data(int_prost_test) res.cm <- sym.lm(formula = lpsa~., sym.data = int_prost_train, method = 'cm') res.cm
pred.cm <- sym.predict(model = res.cm, new.sym.data = int_prost_test)
RMSE.L(int_prost_test$lpsa, pred.cm$Fitted) RMSE.U(int_prost_test$lpsa, pred.cm$Fitted) R2.L(int_prost_test$lpsa, pred.cm$Fitted) R2.U(int_prost_test$lpsa, pred.cm$Fitted) deter.coefficient(int_prost_test$lpsa, pred.cm$Fitted)
data(int_prost_train) data(int_prost_test)
res.cm.lasso <- sym.glm(sym.data = int_prost_train, response = 9, method = 'cm', alpha = 1, nfolds = 10, grouped = TRUE)
pred.cm.lasso <- sym.predict(res.cm.lasso, response = 9, int_prost_test, method = 'cm')
plot(res.cm.lasso) plot(res.cm.lasso$glmnet.fit, "lambda", label=TRUE)
RMSE.L(int_prost_test$lpsa,pred.cm.lasso) RMSE.U(int_prost_test$lpsa,pred.cm.lasso) R2.L(int_prost_test$lpsa,pred.cm.lasso) R2.U(int_prost_test$lpsa,pred.cm.lasso) deter.coefficient(int_prost_test$lpsa, pred.cm.lasso)
data(int_prost_train) data(int_prost_test) res.cm.ridge <- sym.glm(sym.data = int_prost_train, response = 9, method = 'cm', alpha = 0, nfolds = 10, grouped = TRUE)
pred.cm.ridge <- sym.predict(res.cm.ridge, response = 9, int_prost_test, method = 'cm')
plot(res.cm.ridge) plot(res.cm.ridge$glmnet.fit, "lambda", label=TRUE) RMSE.L(int_prost_test$lpsa, pred.cm.ridge) RMSE.U(int_prost_test$lpsa, pred.cm.ridge) R2.L(int_prost_test$lpsa, pred.cm.ridge) R2.U(int_prost_test$lpsa, pred.cm.ridge) deter.coefficient(int_prost_test$lpsa, pred.cm.ridge)
data("oils") res <- sym.pca(oils,'centers') plot(res, choix = "ind") plot(res, choix = "var")
res <- sym.pca(oils,'tops') plot(res, choix = "ind")
res <- sym.pca(oils, 'principal.curves') plot(res, choix = "ind")
res <- sym.pca(oils,'optimized.distance') plot(res, choix = "ind") plot(res, choix = "var")
res <- sym.pca(oils,'optimized.variance') plot(res, choix = "ind") plot(res, choix = "var")
data("ex_mcfa1") ex_mcfa1
sym.table <- classic.to.sym(x = ex_mcfa1, concept = suspect, default.categorical = sym.set) sym.table
res <- sym.mcfa(sym.table, c(2,3)) mcfa.scatterplot(res[,2], res[,3], sym.data = sym.table, pos.var = c(2,3))
res <- sym.mcfa(sym.table, c(2,3,4)) mcfa.scatterplot(res[,2], res[,3], sym.data = sym.table, pos.var = c(2,3,4))
datos <- oils
datos
x <- sym.umap(datos) x
plot(x)
datos <- Cardiological
datos
x <- sym.umap(datos) x
plot(x)
data(oils) datos <- oils interval.length(datos)
data("hardwoodBrito") Hardwood.histogram<-hardwoodBrito Hardwood.cols<-colnames(Hardwood.histogram) Hardwood.names<-row.names(Hardwood.histogram) Hardwood.histogram Hardwood.histogram[[1]][[1]]
weighted.center<-weighted.center.Hist.RSDA(Hardwood.histogram)
BIN.Matrix<-matrix(rep(3,length(Hardwood.cols)*length(Hardwood.names)),nrow = length(Hardwood.names))
pca.hist<-sym.histogram.pca(Hardwood.histogram,BIN.Matrix) pca.hist$classic.PCA pca.hist$sym.hist.matrix.PCA
ACER.p1<-Sym.PCA.Hist.PCA.k.plot(data.sym.df = pca.hist$Bins.df, title.graph = " ", concepts.name = c("ACER"), title.x = "First Principal Component (84.83%)", title.y = "Frequency", pca.axes = 1) ACER.p1
ALL.p1<-Sym.PCA.Hist.PCA.k.plot(data.sym.df = pca.hist$Bins.df, title.graph = " ", concepts.name = unique(pca.hist$Bins.df$Object.Name), title.x = "First Principal Component (84.83%)", title.y = "Frequency", pca.axes = 1) ALL.p1
Hardwood.quantiles.PCA<-quantiles.RSDA(pca.hist$sym.hist.matrix.PCA,3) label.name<-"Hard Wood" Title<-"First Principal Plane" axes.x.label<- "First Principal Component (84.83%)" axes.y.label<- "Second Principal Component (9.70%)" concept.names<-c("ACER") var.names<-c("PC.1","PC.2") quantile.ACER.plot<-Percentil.Arrow.plot(Hardwood.quantiles.PCA, concept.names, var.names, Title, axes.x.label, axes.y.label, label.name ) quantile.ACER.plot
label.name<-"Hard Wood" Title<-"First Principal Plane" axes.x.label<- "First Principal Component (84.83%)" axes.y.label<- "Second Principal Component (9.70%)" concept.names<-row.names(Hardwood.quantiles.PCA) var.names<-c("PC.1","PC.2") quantile.plot<-Percentil.Arrow.plot(Hardwood.quantiles.PCA, concept.names, var.names, Title, axes.x.label, axes.y.label, label.name ) quantile.plot
label.name<-"Hard Wood" Title<-"First Principal Plane" axes.x.label<- "PC 1 (84.83%)" axes.y.label<- "PC 2 (9.70%)" concept.names<-c("ACER") var.names<-c("PC.1","PC.2") plot.3D.HW<-sym.quantiles.PCA.plot(Hardwood.quantiles.PCA, concept.names, var.names, Title, axes.x.label, axes.y.label, label.name) plot.3D.HW
concept.names<-row.names(Hardwood.quantiles.PCA) sym.all.quantiles.plot(Hardwood.quantiles.PCA, concept.names, var.names, Title, axes.x.label, axes.y.label, label.name)
sym.all.quantiles.mesh3D.plot(Hardwood.quantiles.PCA,
concept.names,
var.names,
Title,
axes.x.label,
axes.y.label,
label.name)
Hardwood.quantiles.PCA.2<-quantiles.RSDA.KS(pca.hist$sym.hist.matrix.PCA,100) h<-Hardwood.quantiles.PCA.2[[1]][[1]] tmp<-HistRSDAToEcdf(h) h2<-Hardwood.quantiles.PCA.2[[1]][[2]] tmp2<-HistRSDAToEcdf(h2) h3<-Hardwood.quantiles.PCA.2[[1]][[3]] tmp3<-HistRSDAToEcdf(h3) h4<-Hardwood.quantiles.PCA.2[[1]][[4]] tmp4<-HistRSDAToEcdf(h4) h5<-Hardwood.quantiles.PCA.2[[1]][[5]] tmp5<-HistRSDAToEcdf(h5) breaks.unique<-unique(c(h$breaks,h2$breaks,h3$breaks,h4$breaks,h5$breaks)) tmp.unique<-breaks.unique[order(breaks.unique)] tmp<-tmp(v = tmp.unique) tmp2<-tmp2(v = tmp.unique) tmp3<-tmp3(v = tmp.unique) tmp4<-tmp4(v = tmp.unique) tmp5<-tmp5(v = tmp.unique) abs_dif <- abs(tmp2 - tmp) # La distancia Kolmogorov–Smirnov es el máximo de las distancias absolutas. distancia_ks <- max(abs_dif) distancia_ks
library(tidyr) # Se unen los valores calculados en un dataframe. df.HW <- data.frame( PC.1 = tmp.unique, ACER = tmp, ALNUS = tmp2, FRAXINUS = tmp3, JUGLANS = tmp4, QUERCUS = tmp5 ) %>% pivot_longer( cols = c(ACER, ALNUS,FRAXINUS,JUGLANS,QUERCUS), names_to = "HardWood", values_to = "ecdf" ) grafico_ecdf <- ggplot(data = df.HW, aes(x = PC.1, y = ecdf, color = HardWood)) + geom_line(size = 1) + labs( color = "Hardwood", y = "Empirical Cumulative Distribution " ) + theme_bw() + theme(legend.position = "bottom", plot.title = element_text(size = 12))+geom_line() grafico_ecdf
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.