knitr::opts_chunk$set( echo = FALSE, message = TRUE, warning = TRUE )
# Data handling library(tidyverse) library(lubridate) # Data generation library(faux) # power Law library(poweRlaw) # Visualization library(patchwork) library(cowplot) library(scales)
## Define binarization function binarize_data <- function(data, cut_off) { data_bin <- data for (i in 1:(ncol(data))){ orig <- paste("v", i, sep = "") bin <- paste("V", i, sep = "") data_bin[orig] <- dplyr::case_when(data_bin[bin] <= cut_off ~ 0, data_bin[bin] > cut_off ~ 1) } return(data_bin) }
# General code # Get Frequency and total sums data_profiles <- list() for (i in 1:length(data_list)) { data_profiles[[i]] <- data_list[[i]] %>% dplyr::select((data_gen_grid[[i,1]]+1):ncol(data_list[[i]]) ) data_profiles[[i]] <- plyr::count(data_profiles[[i]]) data_profiles[[i]] <- data_profiles[[i]] %>% mutate(total_bin = rowSums(data_profiles[[i]])-freq) }
# original code # Create new data frame data2 <- data1_binarized %>% select(total:ncol(data1_binarized)) %>% select(-total) %>% tibble() ## Count frequency of profiles data2_counted <- plyr::count(data2[, ]) # Create sum score of endorsed symptoms data2_counted <- data2_counted %>% mutate(total_bin = rowSums(data2_counted)-freq) ###### 4.2 Create datax ######################################################## # Create full dataset datax <- dplyr::left_join(data1_binarized, data2_counted)
assess_power_law <- function(data) { TEMP_List <- list() TEMP_List[[1]] <- matrix(nrow = 1, ncol = 5) TEMP_List[[2]] <- list() TEMP_List[[3]] <- list() #### Prepare Distribution <- data$freq ### Power Law m_pl = displ$new(Distribution) est_pl = estimate_xmin(m_pl) m_pl$setXmin(est_pl) # Estimated Parameters TEMP_List[[1]][1,1] <- m_pl$xmin TEMP_List[[1]][1,2] <- m_pl$pars ## Log normal with Xmin of PL m_ln_EQ = dislnorm$new(Distribution) m_ln_EQ$setXmin(m_pl$getXmin()) est_m_ln_EQ = estimate_pars(m_ln_EQ) m_ln_EQ$setPars(est_m_ln_EQ) # Formally assess TEMP_List[[1]][1,3] <- compare_distributions(m_pl, m_ln_EQ)$p_two_sided # p < 0.05 -> none of the two has better fit TEMP_List[[1]][1,4] <- compare_distributions(m_pl, m_ln_EQ)$p_one_sided # p < 0.05 -> m_ln_EQ better fit TEMP_List[[1]][1,5] <- compare_distributions(m_ln_EQ, m_pl)$p_one_sided # p < 0.05 -> m_pl better fi TEMP_List[[2]] <- m_pl TEMP_List[[3]] <- m_ln_EQ return(TEMP_List) } RESULTS <- matrix(ncol = 5, nrow = length(data_profiles)) TEMP_Res <- list() VISUALIZE_pl <- list() VISUALIZE_pl_l <- list() VISUALIZE_ln_l <- list() colnames(RESULTS) <- c("Xmin", "alpha", "p - two side", "Favoring PL", "Favoring LN") for (i in 1:length(data_profiles)) { TEMP_Res <- assess_power_law(data = data_profiles[[i]]) RESULTS[i,1:5] <- TEMP_Res[[1]] VISUALIZE_pl[[i]] <- plot(TEMP_Res[[2]]) VISUALIZE_pl_l[[i]] <- lines(TEMP_Res[[2]]) VISUALIZE_ln_l[[i]] <- lines(TEMP_Res[[3]]) }
RESULTS %>% #as_tibble() %>% #arrange(Features, Sample_Size) %>% round(digits = 3) #%>% #print(n=32)
# Plot for (i in 1:length(data_profiles)) { print(ggplot(VISUALIZE_pl[[i]], aes(x=x,y=y)) + geom_point(size = 1) + scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x)), expand = c(0, 0), limits = c(10^-5, 1)) + scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x)), expand = c(0, 0), limits = c(1, 10^5)) + geom_line(data = VISUALIZE_pl_l[[i]], aes(x=x, y=y), color = "red", size = 0.4) + geom_line(data = VISUALIZE_ln_l[[i]], aes(x=x, y=y), color = "blue", size = 0.4,linetype = "dashed")+ xlab("") + ylab("") + ggtitle("") + theme_minimal_grid() + theme( plot.title = element_text(size=11), axis.title.x = element_text(size=9, margin = margin(t = 0, r = 0, b = 0, l = 0)), axis.title.y = element_text(size=9, margin = margin(t = 0, r = 0, b = 0, l = 0)), axis.text.x = element_text(size=9, color = "black"), axis.text.y = element_text(size=9, color = "black", margin = margin(t = 0, r = 0, b = 0, l = 5)), axis.ticks = element_blank(), panel.grid.major.x = element_line(size=.2, color="black"), panel.grid.major.y = element_line(size=.2, color="black"), panel.grid.minor.y = element_blank(), panel.border = element_rect(colour = "black", fill=NA, size=1))) }
save.image(file = "Sim_Pheno.RData")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.