Principal Components Analysis"

\normalsize

```{whites, eval=FALSE, echo = eval_rows}

Drop empty rows

rowsums <- data.frame(sapply(df,is.na)) rows_drop <- (which(rowSums(rowsums) == ncol(df))) df <- df[-rows_drop, ,drop=FALSE]

```r
indices <- which(colnames(df_code) %in% colnames(df))
cat("\\# `Selected variables`")
cat("\\newline ")
cat("`colnames_selected = ")
cat(paste0("c(", paste(indices, collapse=','), ")`"))

```{whites, eval=FALSE, echo = eval_code}

Data frame of the selected variables

df <- df[ ,colnames_selected, drop=FALSE]

```{whites, eval=FALSE, echo = indic_missings}
# Missing values imputation 
df <- imputeMissings::impute(df, method="randomForest")
cat("\\# `Scaling`")
cat("\\newline ")
cat("`scale <- ", scale, "`")

```{whites, eval=FALSE, echo = eval_code}

Define number of dimensions to keep in the results

ncp <- min(ncol(df),5)

Perform PCA

pca <- PCA(df, ncp=ncp, scale.unit = scale)

```{whites, eval=FALSE, echo = eval_code}
# Data Head (Transposed)
kable(t(head(df, n=5)), digits=3, format="simple") 

```{whites, eval=FALSE, echo = eval_code}

Pearson Correlations (Matrix)

M <- cor(df, use="complete.obs", method="pearson") corrplot(M, is.corr=TRUE, method="number", type="lower", tl.col="#396e9f", tl.cex = 0.5, cl.cex = 0.5, number.cex = 0.4, cl.align.text="l")

```{whites, eval=FALSE, echo = eval_code}
# Kendall Correlations (Matrix)
M <- cor(df, use="complete.obs", method="kendall")
corrplot(M, is.corr=TRUE, method="number", type="lower", tl.col="#396e9f", 
         tl.cex = 0.5, cl.cex = 0.5, number.cex = 0.4, cl.align.text="l")

```{whites, eval=FALSE, echo = eval_code}

Eigenvalues and Proportion Explained

eigentable <- get_eigenvalue(pca) kable(eigentable, digits=3, col.names = c("Eigenvalue","Variance Explained (%)", "Cumulative Variance Explained (%)"), format="simple")

```{whites, eval=FALSE, echo = eval_code}
# Screeplot
fviz_eig(pca, addlabels = TRUE, ylim = c(0, 50))

```{whites, eval=FALSE, echo = eval_code}

Variables

Loadings

df_svd <- scale(df, center=TRUE, scale=scale) tmp <- svd.triplet(df_svd, ncp = ncp) princomps <- tmp$V colprincomp <- c("PC1", "PC2", "PC3", "PC4", "PC5") rownames(princomps) <- colnames(df) kable(princomps, digits=3, col.names=colprincomp[1:ncp], format="simple")

```{whites, eval=FALSE, echo = eval_code}
## Standardized Loadings
correlations <- pca$var$cor[,1:ncp]
kable(correlations, digits=3, format="simple")

```{whites, eval=FALSE, echo = eval_code}

Squared Standardized Loadings

kable(pca$var$cos2, digits=3, format="simple")

```{whites, eval=FALSE, echo = eval_code}
## Standardized Loadings Plots (Dimensions 1-2)
fviz_pca_var(pca, col.var = "cos2", axes = c(1,2), 
             gradient.cols = c("#ff9900", "#2fa42d", "#396e9f"), 
             title="", ggtheme=theme_minimal(base_size = 22))

```{whites, eval=FALSE, echo = ncp3}

Standardized Loadings Plots (More Dimensions)

for (i in 1:ncp){ for (j in 1:ncp){ nam <- paste0(paste0("plot",i),j) plot <- fviz_pca_var(pca, axes = c(i,j), title="", ggtheme=theme_minimal(base_size = 8)) assign(nam, plot) } }

Plots arranged

gridExtra::grid.arrange(grobs = list(plot12,plot13,plot23), nrow=1, ncol=3, widths = unit(c(7, 7, 7), "cm"), heights = unit(7, "cm"), padding = unit(0.3, "line"))

```{whites, eval=FALSE, echo = ncp4}
## Standardized Loadings Plots (More Dimensions)
for (i in 1:ncp){
  for (j in 1:ncp){
  nam <- paste0(paste0("plot",i),j)
  plot <- fviz_pca_var(pca, axes = c(i,j), title="", 
                       ggtheme=theme_minimal(base_size = 8))
  assign(nam, plot)
  }
}

# Plots arranged
gridExtra::grid.arrange(grobs = list(plot12,plot13,plot23,plot14,plot24,plot34), 
                      nrow=2, ncol=3, widths = unit(c(6, 6, 6), "cm"), 
                      heights = unit(c(6, 6), "cm"),
                      padding = unit(0.3, "line")) 

```{whites, eval=FALSE, echo = ncp5}

Standardized Loadings Plots (More Dimensions)

for (i in 1:ncp){ for (j in 1:ncp){ nam <- paste0(paste0("plot",i),j) plot <- fviz_pca_var(pca, axes = c(i,j), title="", ggtheme=theme_minimal(base_size = 8)) assign(nam, plot) } }

Plots arranged

gridExtra::grid.arrange(grobs = list(plot12,plot13,plot23,plot14,plot24,plot34, plot15,plot25,plot35,plot45), nrow=4, ncol=3, widths = unit(c(5, 5, 5), "cm"), heights = unit(c(5, 5, 5, 5), "cm"), padding = unit(0.3, "line"))

```{whites, eval=FALSE, echo = eval_code}
## Standardized Loadings Plot (Matrix)
col_statsomat <- colorRampPalette(c("#ff9900", "#2fa42d", "#396e9f"))
corrplot(pca$var$cor, is.corr=FALSE, tl.col="#396e9f", col=col_statsomat(10), 
         tl.cex = 0.5, cl.cex = 0.5, cl.align.text="l")

```{whites, eval=FALSE, echo = eval_code}

Squared Standardized Loadings Plot (Matrix)

corrplot(pca$var$cos2, is.corr=FALSE, method="color", tl.col="#396e9f", tl.cex = 0.5, cl.cex = 0.5, cl.align.text="l")

```{whites, eval=FALSE, echo = eval_code}
# Observations
## Scores (Head)
kable(head(pca$ind$coord), digits=3, format="simple")

```{whites, eval=FALSE, echo = eval_code}

Score Plot (Dimensions 1-2)

fviz_pca_ind(pca, axes = c(1,2), title="", col.ind = "#396e9f", ggtheme=theme_minimal(base_size = 11))

```{whites, eval=FALSE, echo = ncp3}
## Score Plots (More Dimensions)
for (i in 1:ncp){
  for (j in 1:ncp){
  nam <- paste0(paste0("plot",i),j)
  plot <- fviz_pca_ind(pca, axes = c(i,j), col.ind = "#396e9f", 
                       title="", ggtheme=theme_minimal(base_size = 8))
  assign(nam, plot)
  }
}

# Plots arranged
gridExtra::grid.arrange(grobs = list(plot12,plot13,plot23), 
                      nrow=1, ncol=3, widths = unit(c(7, 7, 7), "cm"), 
                      heights = unit(7, "cm"),
                      padding = unit(0.3, "line")) 

```{whites, eval=FALSE, echo = ncp4}

Score Plots (More Dimensions)

for (i in 1:ncp){ for (j in 1:ncp){ nam <- paste0(paste0("plot",i),j) plot <- fviz_pca_ind(pca, axes = c(i,j), col.ind = "#396e9f", title="", ggtheme=theme_minimal(base_size = 8)) assign(nam, plot) } }

Plots arranged

gridExtra::grid.arrange(grobs = list(plot12,plot13,plot23,plot14,plot24,plot34), nrow=2, ncol=3, widths = unit(c(6, 6, 6), "cm"), heights = unit(c(6, 6), "cm"), padding = unit(0.3, "line"))

```{whites, eval=FALSE, echo = ncp5}
## Score Plots (More Dimensions)
for (i in 1:ncp){
  for (j in 1:ncp){
  nam <- paste0(paste0("plot",i),j)
  plot <- fviz_pca_ind(pca, axes = c(i,j), col.ind = "#396e9f", 
                       title="", ggtheme=theme_minimal(base_size = 8))
  assign(nam, plot)
  }
}

# Plots arranged
gridExtra::grid.arrange(grobs = list(plot12,plot13,plot23,plot14,plot24,plot34,
                                       plot15,plot25,plot35,plot45), 
                      nrow=4, ncol=3, widths = unit(c(5, 5, 5), "cm"), 
                      heights = unit(c(5, 5, 5, 5), "cm"),
                      padding = unit(0.3, "line")) 

```{whites, eval=FALSE, echo = eval_code}

Biplot Dimensions 1-2

fviz_pca_biplot(pca, col.var = "#ff9900", col.ind = "#396e9f", title="")

```{whites, eval=FALSE, echo = eval_code}
# Interpretation by the FactoInvestigate package (opens in the browser)
Investigate(pca, document="html_document", 
            keepRmd=FALSE, openFile=TRUE, out.selec=FALSE)
cat("\n# References", fill=TRUE)


Try the Statsomat package in your browser

Any scripts or data that you put into this service are public.

Statsomat documentation built on Nov. 17, 2021, 5:17 p.m.