knitr::opts_chunk$set(
    echo = FALSE,
    message = TRUE,
    warning = TRUE
)
# Data handling
library(tidyverse)
library(lubridate)

# Data generation
library(faux)

# power Law
library(poweRlaw)

# Visualization
library(patchwork)
library(cowplot)
library(scales)

Function 1

## Define binarization function 
binarize_data <- 
  function(data, cut_off) {
    data_bin <- data
    for (i in 1:(ncol(data))){
      orig <- paste("v", i, sep = "")
      bin <- paste("V", i, sep = "")
      data_bin[orig] <- dplyr::case_when(data_bin[bin] <= cut_off ~ 0, 
                                         data_bin[bin] > cut_off ~ 1) 
    }
  return(data_bin)
  }

Function 2

# General code
# Get Frequency and total sums
data_profiles <- list()
for (i in 1:length(data_list)) 
{
data_profiles[[i]] <- data_list[[i]] %>% 
  dplyr::select((data_gen_grid[[i,1]]+1):ncol(data_list[[i]]) )
data_profiles[[i]]  <- plyr::count(data_profiles[[i]])
data_profiles[[i]] <- data_profiles[[i]] %>% 
  mutate(total_bin = rowSums(data_profiles[[i]])-freq)
}

Function 2 - original code

# original code
# Create new data frame
data2 <- data1_binarized %>% 
  select(total:ncol(data1_binarized)) %>% 
  select(-total) %>% 
  tibble()

## Count frequency of profiles
data2_counted <- plyr::count(data2[, ])

# Create sum score of endorsed symptoms
data2_counted <- data2_counted %>% 
  mutate(total_bin = rowSums(data2_counted)-freq)


######  4.2 Create datax ########################################################
# Create full dataset
datax <- dplyr::left_join(data1_binarized, data2_counted)

Function 3 & 5

assess_power_law <- 
  function(data) {
  TEMP_List <- list()
  TEMP_List[[1]] <- matrix(nrow = 1, ncol = 5)
  TEMP_List[[2]] <- list()
  TEMP_List[[3]] <- list()
#### Prepare
Distribution <- data$freq

### Power Law
m_pl = displ$new(Distribution)
est_pl = estimate_xmin(m_pl)
m_pl$setXmin(est_pl)

# Estimated Parameters
TEMP_List[[1]][1,1] <- m_pl$xmin
TEMP_List[[1]][1,2] <- m_pl$pars

## Log normal with Xmin of PL
m_ln_EQ = dislnorm$new(Distribution) 
m_ln_EQ$setXmin(m_pl$getXmin())
est_m_ln_EQ = estimate_pars(m_ln_EQ)
m_ln_EQ$setPars(est_m_ln_EQ)

# Formally assess
TEMP_List[[1]][1,3] <- compare_distributions(m_pl, m_ln_EQ)$p_two_sided #   p < 0.05  -> none of the two has better fit
TEMP_List[[1]][1,4] <- compare_distributions(m_pl, m_ln_EQ)$p_one_sided #   p < 0.05 -> m_ln_EQ  better fit
TEMP_List[[1]][1,5] <- compare_distributions(m_ln_EQ, m_pl)$p_one_sided #   p < 0.05 -> m_pl better fi

TEMP_List[[2]] <- m_pl
TEMP_List[[3]] <- m_ln_EQ

return(TEMP_List)
  }


RESULTS <- matrix(ncol = 5, nrow = length(data_profiles)) 
TEMP_Res <- list()
VISUALIZE_pl <- list()
VISUALIZE_pl_l <- list()
VISUALIZE_ln_l <- list()
colnames(RESULTS) <- c("Xmin", "alpha", "p - two side", "Favoring PL", "Favoring LN")

for (i in 1:length(data_profiles)) {
  TEMP_Res <- assess_power_law(data = data_profiles[[i]])
  RESULTS[i,1:5] <- TEMP_Res[[1]]
  VISUALIZE_pl[[i]] <- plot(TEMP_Res[[2]])
  VISUALIZE_pl_l[[i]] <- lines(TEMP_Res[[2]])
  VISUALIZE_ln_l[[i]] <- lines(TEMP_Res[[3]])
}

Part of Function 5: p-values

RESULTS %>% 
  #as_tibble() %>% 
  #arrange(Features, Sample_Size) %>% 
  round(digits = 3) #%>% 
  #print(n=32)

Part of Function 5: viz

# Plot
for (i in 1:length(data_profiles)) {
 print(ggplot(VISUALIZE_pl[[i]], aes(x=x,y=y)) + geom_point(size = 1) +
  scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
                labels = trans_format("log10", math_format(10^.x)),
                expand = c(0, 0),
                limits = c(10^-5, 1)) + 
  scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
                labels = trans_format("log10", math_format(10^.x)),
                expand = c(0, 0),
                limits = c(1, 10^5)) + 
  geom_line(data = VISUALIZE_pl_l[[i]], aes(x=x, y=y), color = "red", size = 0.4) + 
  geom_line(data = VISUALIZE_ln_l[[i]], aes(x=x, y=y), color = "blue", size = 0.4,linetype = "dashed")+ 
  xlab("") + 
  ylab("") +
  ggtitle("") +
  theme_minimal_grid() +
  theme(
  plot.title = element_text(size=11),
  axis.title.x = element_text(size=9, margin = margin(t = 0, r = 0, b = 0, l = 0)),
  axis.title.y = element_text(size=9, margin = margin(t = 0, r = 0, b = 0, l = 0)),
  axis.text.x = element_text(size=9, color = "black"),
  axis.text.y = element_text(size=9, color = "black", margin = margin(t = 0, r = 0, b = 0, l = 5)),
  axis.ticks = element_blank(),
  panel.grid.major.x = element_line(size=.2, color="black"), 
  panel.grid.major.y = element_line(size=.2, color="black"), 
  panel.grid.minor.y = element_blank(), 
  panel.border = element_rect(colour = "black", fill=NA, size=1)))
}
save.image(file = "Sim_Pheno.RData")


orduek/PsychPower documentation built on Oct. 25, 2023, 7:36 a.m.