library(knitr) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(stpvers) library(tidyverse) hyper$chol0[c(1,5)]<- NA hyper$rrs0[c(2,5)]<- NA hyper$rrd0[c(3,5)]<- NA
Umformen von einem Breit-Format nach einem Lang-Format. Melt2 und melt2 sind Erweiterungen der reshape2::melt Funktion. Intern wird melt und dcast verwendet.
df <- data.frame(month=rep(1:3,2), student=rep(c("Amy", "Bob"), each=3), A=c(9, 7, 6, 8, 6, 9), B=c(6, 7, 8, 5, 6, 7)) mean3 <- function(x) round(mean(x), 1) df
tidyr::spread(df[-4], student, A) (df_w1 <- Wide(df[-4], student, A))
tidyr::gather(df_w1, key = "student", value = "A", Amy, Bob) Long(Amy + Bob ~ month, df_w1, key="student", value="A") Long(df_w1, id.vars=1, key = "student", value = "A")
(df_w2 <- Wide(df, student, c("A", "B"))) Long(list(A=c("Amy_A", "Bob_A" ), B=c("Amy_B", "Bob_B")), df_w2, by = ~ month, key = "student", key.levels= c("Amy", "Bob"))
Summarise(A + B ~ student, df, fun=mean3, key = "group", value = "cbc") Summarise(A + B ~ student, df, fun=mean3, margins = TRUE) Summarise(A + B ~ student, df, fun=mean3, formula = variable ~ student, margins = TRUE)
Die Funktion separate_multiple_choice() transformiert einen String mit Trennzeichen zu einem Multi-Set mit 0 und 1. (Separate multiple choice) der param x ist entweder ein Character oder eine zahl
dat <- data.frame( Q1 = c(134, NA, 35, 134, 5, 24), Q2 = c( "Alm Dudler, Essig, Cola", NA, "Cola, Holer", "Alm Dudler, Cola, Essig","Holer", "Bier, Essig" ) ) dat cbind(dat[-1], separate_multiple_choice(dat$Q2)) dat <- cbind(dat[-2], separate_multiple_choice(dat$Q1, label = c( "Alm Dudler", "Bier", "Cola", "Essig", "Holer" ))) names(dat) <- GetLabelOrName(dat) dat
#Melt2 hyper_smal <- hyper[, c("g","chol0","chol1","chol6","chol12")] data_long <- Long(hyper_smal, id.vars=1, key = "time", value = "chol") head(data_long)
Aber ohne die Labels.
data_long <- hyper %>% tidyr::gather("time", "chol", chol0:chol12) %>% dplyr::select(g, time, chol) head(data_long)
head(x<-Long(chol0+chol1+chol6+chol12~g , hyper))
head( x<- hyper %>% Long(chol0,chol1,chol6,chol12, by=~g))
hyper2 <- hyper[, c("chol0", "chol1", "chol6", "chol12")] head(hyper_long <- Long(~chol0+chol1+chol6+chol12, hyper2, key = "time", value = "chol")) head(hyper_long <- Long(hyper2, key = "time", value = "chol"))
Bei listen ist die Reihenfolge entscheidend also "chol0", "chol1", "chol6", "chol12" gibt ein anderes Ergebniss als "chol0", "chol12", "chol1", "chol6"
hyper_long <- Long( list( chol = c("chol0", "chol1", "chol6", "chol12"), rrs = c("rrs0", "rrs1" , "rrs6" , "rrs12"), rrd = c("rrd0" , "rrd1", "rrd6", "rrd12") ), hyper, by = ~ nr+g, key = "Zeit", key.levels = c("Ausgangswert", "1 Monat", "6 Monate", "12 Monate") ) rbind( hyper_long[1:5,], dplyr::sample_n(hyper_long,5 ))
hyper_long %>% Wide(Zeit, c("chol", "rrs", "rrd"))
Mischung usd der plyr::llply und data.frame-Funktion. Wenn keine Funktion uebergeben wird wird der Datensatz zu Zahlen transformiert.
funy <- function(x) cut(x, 3, c("low", "med", "hig")) res1 <- Dapply( ~ a + gr + gew + chol0 + chol1 + chol6, hyper, fun = funy) res2 <- Dapply(hyper, a, gr, gew, chol0, chol1, chol6, fun = funy) vrs<- c("nr", "med", "a", "gr", "gew", "chol0", "chol1", "chol6") rbind(res1[1:5, vrs], res2[1:5, vrs])
funny<- function(x) round(mean(x), 1) hyper_long <- Long(hyper, chol0, chol1, chol6, chol12, by=~g) aggregate( value~variable, hyper_long, funny) %>% Output(caption="aggregate") hyper_long<-Long(chol0+chol1+chol6+chol12~g, hyper, "Zeit", "Cholesterin") #-- Spread + aggragate aggregate(Cholesterin~Zeit+g, hyper_long, funny) %>% spread(g, Cholesterin) %>% Output(caption="aggregate mit spread")
das gleiche wie aggragate
hyper_long %>% group_by(Zeit, g) %>% summarise(Cholesterin=funny(Cholesterin)) %>% spread(g, Cholesterin) %>% Output(caption="dplyr mit spread")
Summarise Recast2 das gleiche wie oben
Summarise(chol0+chol1+chol6+chol12~g, hyper, fun=funny, formula=variable~g) %>% Output(caption="Summarise mit formula")
das gleiche wie oben nur mit spread
Summarise( chol0 + chol1 + chol6 + chol12 ~ g, hyper, fun = funny, key = "Zeit", value = "Cholesterin" ) %>% spread(g, Cholesterin) %>% Output(caption = "Summarise mit spread")
und noch die margins
Summarise( chol0 + chol1 + chol6 + chol12 ~ g, hyper, fun = funny, formula = variable ~ g, margins = TRUE ) %>% Output(caption = "Summarise mit margins")
Summarise( chol0 + chol1 + chol6 + chol12 ~ g, hyper, key = "Zeit", fun = function(x) c( N = length(na.omit(x)), mean = round(mean(x, na.rm = TRUE), 2), sd = round(sd(x, na.rm = TRUE), 2) ), ) %>% Wide( g, c("N", "mean", "sd")) %>% Output(caption = "Summarise mit eigener fun")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.