library(tidyverse) library(cluster) library(plotly) library(fpc) library(dendextend) library(factoextra) library(FactoMineR) library(NbClust) library(caret) library(DMwR) library(ggraph)
data_cluster <- read_csv("/Users/gabrielburcea/rprojects/stats_data_whole/data_categ_no_sev.csv") level_key_temperature <- c("Yes" = "37.5-38", "Yes" = "38.1-39", "Yes" = "38.2-39", "Yes" = "39.1-41") data_select <- data_cluster %>% dplyr::select(ID, covid_tested, chills, cough, diarrhoea, fatigue, headache, loss_smell_taste, muscle_ache, nasal_congestion, nausea_vomiting, shortness_breath, sore_throat, sputum, temperature, asthma, diabetes_type_one, diabetes_type_two, obesity, hypertension, heart_disease, lung_condition, liver_disease, kidney_disease) %>% dplyr::filter(covid_tested != "none") covid_tested_levels <- c("positive" = "showing symptoms") data_transf <- data_select %>% dplyr::mutate(covid_tested = forcats::fct_recode(covid_tested, !!!covid_tested_levels), temperature = forcats::fct_recode(temperature, !!!level_key_temperature)) %>% dplyr::filter(covid_tested != "none") data_transf$temperature <- as.character(data_transf$temperature) data_transf$ID <- NULL data_transf$covid_tested <- NULL level_key_comorbidities <- c("kidney disease" = "kidney_disease", "lung condition" = "lung_condition", "diabetes type one" = "diabetes_type_one", "diabetes type two" = "diabetes_type_two", "liver disease" = "liver_disease", "heart disease" = "heart_disease") data_pi<- data_transf %>% tidyr::replace_na(list(nausea_vomiting = 0)) %>% dplyr::rename('loss of smell and taste' = loss_smell_taste, 'muscle ache' = muscle_ache, 'nasal congestion'= nasal_congestion, 'nausea and vomiting' = nausea_vomiting, 'shortness of breath' = shortness_breath, 'sore throat' = sore_throat, "kidney disease" = kidney_disease, "diabetes type one" = diabetes_type_one, "diabetes type two" = diabetes_type_two, "liver disease" = liver_disease, "heart disease" = heart_disease) # data_piv <- data_transf %>% # pivot_longer(cols = 14:22, # names_to = "Comorbidities", # values_to = "Bolean") %>% # dplyr::filter(Bolean == "Yes") %>% # data_piv <- data_piv %>% # pivot_wider(names_from = Comorbidities, values_from = Freq) %>% # pivot_longer(cols = 3:15, # names_to = "Symptoms", # values_to = "Yes_No") %>% # dplyr::group_by(Comorbidities, Symptoms, Yes_No) %>% # dplyr::summarise(Count = n()) %>% # dplyr::mutate(Freq = Count / sum(Count)*100) %>% # dplyr::filter(Yes_No == "Yes") # data_piv$Bolean <- NULL # # data_piv$Comorbidities <- as.character(data_piv$Comorbidities) # scale data
This is unregularised partial correlation network Threshold argument - removes the edges that are not significant. If I pass any threshold a lot of edges dissapear
library(qgraph) cor_mat <- cor_auto(data_pi) view(round(cor_mat,2)) graph_pcor <- qgraph(cor_mat, graph = "pcor", layout = "spring", threshold = "bonferroni", sampleSize = nrow(data_pi), alpha = 0.01) # t # threshold Inadditiontoanumericvaluetoomitedgesthisargumentcanalsobeassignedastringto omit insignficant edges. Note that this REMOVES edges from the network (which influences centrality measures and the spring layout). Can be "sig" to compute significance without correction for multiple testing, "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr" or "none" which are used directly in the adjust argument in corr.p of the psych package (Revelle, 2014). In addition, this argument can be assigned "locfdr" in which edges are set to zero if the local FDR is below FDRcutoff. fdrtool from the fdrtool package (Klaus and Strimmer, 2014) is used to compute these measures, which is used inside FDRnetwork.
Estimating a partial correlation network using LASSO regularization and EBIC model selection can be done by setting graph = "glasso". The tuning argument sets the EBIC hyperparameter. Set between 0 (more connections but also more spurious connections) and 0.5 (more parsimony, but also missing more connections):
graph_lasso <- qgraph(cor_mat, graph = "glasso", layout = "spring", tuning = 0.25, sampleSize = nrow(data_pi))
Centrality analysis
centRes <- centrality(graph_lasso) # Node strenght (degree): centRes$OutDegree
# Closeness: centRes$Closeness
# Betweeenness: centRes$Betweenness
centralityPlot(graph_lasso)
compare different networks
centralityPlot(GGM = list(unregularized = graph_pcor, regularized = graph_lasso))
to make edges in graphs comparable in qgraph, the cut, minimum and maximum arguments need to be set to the same values. details = TRUE
qgraph(cor_mat, graph = "glasso", layout = "spring", tuning = 0.25, sampleSize = nrow(data_pi), minimum = 0, cut = 0.01, maximum = 1, details = TRUE, esize = 20) # the more I increase the cut the less edges are present - I have tried with 0.15 , 0.10
Comparable layouts
Layout <- averageLayout(graph_pcor,graph_lasso) layout(t(1:2)) qgraph(cor_mat, graph = "pcor", layout = Layout,threshold = "bonferroni", sampleSize = nrow(data_pi), minimum = 0, cut = 0.15, maximum = 1, details = TRUE, esize = 20, title = "Partial correlations") # qgraph(cor_mat, graph = "glasso", layout = Layout, tuning = 0.25, sampleSize = nrow(data_pi), minimum = 0, cut = 0.15, maximum = 1, details = TRUE, esize = 20, title = "LASSO regularization")
qgraph(cor_mat, graph = "glasso", layout = "spring", tuning = 0.25, sampleSize = nrow(data_pi), legend.cex = 0.2, vsize = 5, esize = 15, pastel = TRUE, posCol = "#003399", negCol = "#FF9933", borders = FALSE, vTrans = 200, details = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.