source('test/main_pkgs.R')
library(Ipaper)
library(ropls)
# Dongdong Kong, 2019-08-15
# -------------------------
# 1. Previous数据无用,无法在ALL中匹配上
# 2. Current与ALL完全匹配
headcols <- . %>% .[, 1:10]
df_exam <- read.xlsx("INPUT/d_检查指标.xlsx") %>% data.table()
lst <- read_xlsx2list("INPUT/中大附一GDM复发-20190810.xlsx") %>% map(data.table)
# d_part <- df_all[which(ill_diabete == "妊娠期并发糖尿病"), ]
# df2 <- read.xlsx("INPUT/d_检查指标.xlsx") %>%
# data.table()
# df_all <- lst$ALL
#' two group: 1: diat; 0: non-diat
group_diab <- function(d){
ill_diabete <- d$out_judge %>% str_extract("[\u4e00-\u9fa50-9]*糖尿病")
info_diab <- table(ill_diabete) %>% as.data.table()
ill_diab_sel <- c("妊娠期发生的糖尿病", "妊娠期糖尿病")
d$is_diab <- (ill_diabete %in% ill_diab_sel) %>% as.numeric()
d
}
yinyang <- function(str, to.num = FALSE){
levels <- c("阴性(-)", "弱阳性(+/-)", "阳性(+)", "阳性(1+)", "阳性(2+)", "阳性(3+)", "阳性(4+)")
str %<>% gsub(" ", "", .) %>%
gsub("―", "-", .) %>%
gsub("弱阳\\(\\+/-\\)", "弱阳性(\\+/-)", .)
ans <- factor(str, levels)
if (to.num) ans <- as.numeric(ans) - 1
return(ans)
}
df_current <- lst$current[, .(id_full, age, weight_before, weight, height, out_judge_diabetes)] %>% group_diab
{
df <- merge(df_exam, df_current, c("id_full"))
df[, `:=`(y = as.factor(is_diab),
bmi = weight/(height/100)^2,
bmi_before = weight_before/(height/100)^2)]
df <- df[, -(1:12)] %>% reorder_name(c("is_diab", "bmi", "bmi_before"))
names <- c("A1C", "CHOL", "TG", "LDL", "HDL", "urine.sugar", "urine.protein", "GLU")
d1 <- df[, .(`血红蛋白A1c(HBA1c).x`,
`总胆固醇CHOL.x`, `甘油三酯TG.x`, `高密度胆固醇HDL-c.x`, `低密度胆固醇LDL-c.x`,
`尿糖(干化学)`, `尿蛋白(干化学)`, `葡萄糖GLU` = as.numeric(`葡萄糖GLU`))] %>%
set_colnames(names)
d <- df[, .(y, age, bmi, bmi_before)] %>% cbind(d1)
# d[, 5:7] %<>% map(table)
d$urine.sugar %<>% yinyang(to.num = TRUE)
d$urine.protein %<>% yinyang(to.num = TRUE)
# d <- df[, .(y = as.factor(`is_diab`), bmi, bmi_before,
# `OGTT空腹血糖.x`, `OGTT血糖.1.h.x`, `OGTT血糖.2.h.x`)]
d
}
imageF(d[, -1], main = 'Missing values in INPUT')
# d[, y2 := as.numeric(`OGTT空腹血糖.x` >= 5.1 | `OGTT血糖.1.h.x` >= 10 | `OGTT血糖.2.h.x` >= 8.5)]
## 1. method1-OPLS
m <- opls(d[, -1], d[, y], predI = 2, orthoI = 1)
tbl <- table(fit = fitted(m), ref = d[, y])
# precision(tbl)
recall(tbl, relevant = "1")
# recall: 预测正确的样本数
# precision:
## 2. method2-PCA
m <- opls(d[, -1], plotL = FALSE)
plot(m, parAsColFcVn = d$y)
# tbl <- table(fit = fitted(m), ref = d[, y])
# # precision(tbl)
# recall(tbl, relevant = "1")
##
# info <- map(lst[2:3], group_diab) %>% map("is_diab") %>% as.data.table()
# tbl <- table(info)
# pvalue < 0.05, 则x-y相互非独立
r <- chisq.test(tbl) # "spearman
# t.test 均值检测
# aov: 方差
## 均值, 方差检验--------------------------------------------------------------
summary_fun <- function(x){
mean <- mean(x, na.rm = TRUE)
sd <- sd(x, na.rm = TRUE)
sprintf("%.1f ± %.1f", mean, sd)
}
varnames <- colnames(d)[-1] %>% set_names(., .)
info <- d[, lapply(.SD, summary_fun), .(y),.SDcols = varnames]
grp <- d$y
pvals <-foreach(var = varnames) %do% {
x <- d[[var]]
x0 <- x[grp == 0] # non-diat
x1 <- x[grp == 1] # non-diat
r <- t.test(x0, x1)
r$p.value
} %>% unlist()
vars_sig <- varnames[pvals < 0.05]
## 1. --------------------------------------------------------------------------
m <- opls(d[, -1], d[, y], predI = 2, orthoI = 1)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.