aggregate 2 dplyr
dat <- data.frame(gender=c('M','M','M','M','M','F','F','F','F','F'),
num=c(1,2,3,1,2,3,1,2,3,1),
h=c(170,180,190,180,170,150,160,170,160,150),
w=c(80,70,100,80,60,50,50,60,60,50))
dat$BMI <- dat$w/(dat$h/100)^2
table(dat$gender, dat$num)
## ## 1 2 3 ## F 2 1 2 ## M 2 2 1
책의 113쪽을 보자.
여기서 aggregate
이나 by
와 같은 함수를 자세히 설명해 놓지 않았다.
왜냐하면 이 함수들은 dplyr 또는 data.table 패키지를 사용해서도 동일한 결과를 산출할 수 있기 때문이다. 그리고 요즘에는 잘 사용하지 않는 함수들이다. 흔히 말하는 legacy code라고 말할 수 있다.
하지만 그 의미를 이해할 필요도 있으므로 dplyr로 번역을 해보았다.
tapply(dat$h, list(dat$gender, dat$num), mean)
require(dplyr); require(tidyr)
dat %>% group_by(gender, num) %>% summarise(mean = mean(h)) %>% spread(key='num', value='mean')
## `summarise()` regrouping output by 'gender' (override with `.groups` argument)
## 1 2 3 ## F 155 170 155 ## M 175 175 190 ## # A tibble: 2 x 4 ## # Groups: gender [2] ## gender `1` `2` `3` ## <chr> <dbl> <dbl> <dbl> ## 1 F 155 170 155 ## 2 M 175 175 190
aggregate(h ~ gender + num, sum, data=dat)
dat %>% group_by(gender, num) %>% summarise(h=sum(h)) %>% arrange(num)
## `summarise()` regrouping output by 'gender' (override with `.groups` argument)
## gender num h ## 1 F 1 310 ## 2 M 1 350 ## 3 F 2 170 ## 4 M 2 350 ## 5 F 3 310 ## 6 M 3 190 ## # A tibble: 6 x 3 ## # Groups: gender [2] ## gender num h ## <chr> <dbl> <dbl> ## 1 F 1 310 ## 2 M 1 350 ## 3 F 2 170 ## 4 M 2 350 ## 5 F 3 310 ## 6 M 3 190
aggregate(h + w ~ gender + num, sum, data=dat)
dat %>% group_by(gender, num) %>% summarise(h=sum(h+w)) %>% arrange(num)
## `summarise()` regrouping output by 'gender' (override with `.groups` argument)
## gender num h + w ## 1 F 1 410 ## 2 M 1 510 ## 3 F 2 230 ## 4 M 2 480 ## 5 F 3 420 ## 6 M 3 290 ## # A tibble: 6 x 3 ## # Groups: gender [2] ## gender num h ## <chr> <dbl> <dbl> ## 1 F 1 410 ## 2 M 1 510 ## 3 F 2 230 ## 4 M 2 480 ## 5 F 3 420 ## 6 M 3 290
위의 경우 h=sum(h+w)
인지 h=sum(h)+sum(w)
인지 헷갈릴 수 있다. h=max(h+w)
와 h=max(h)+max(w)
는 다르기 때문에 다음에서 확인하자.
aggregate(h + w ~ gender + num, max, data=dat)
dat %>% group_by(gender, num) %>% summarise(`h + w`=max(h+w)) %>% arrange(num)
## `summarise()` regrouping output by 'gender' (override with `.groups` argument)
## gender num h + w ## 1 F 1 210 ## 2 M 1 260 ## 3 F 2 230 ## 4 M 2 250 ## 5 F 3 220 ## 6 M 3 290 ## # A tibble: 6 x 3 ## # Groups: gender [2] ## gender num `h + w` ## <chr> <dbl> <dbl> ## 1 F 1 210 ## 2 M 1 260 ## 3 F 2 230 ## 4 M 2 250 ## 5 F 3 220 ## 6 M 3 290
aggregate(cbind(h,w)~gender+num, sum, data=dat)
dat %>% group_by(gender, num) %>% summarise(h=sum(h), w=sum(w)) %>% arrange(num)
## `summarise()` regrouping output by 'gender' (override with `.groups` argument)
## gender num h w ## 1 F 1 310 100 ## 2 M 1 350 160 ## 3 F 2 170 60 ## 4 M 2 350 130 ## 5 F 3 310 110 ## 6 M 3 190 100 ## # A tibble: 6 x 4 ## # Groups: gender [2] ## gender num h w ## <chr> <dbl> <dbl> <dbl> ## 1 F 1 310 100 ## 2 M 1 350 160 ## 3 F 2 170 60 ## 4 M 2 350 130 ## 5 F 3 310 110 ## 6 M 3 190 100
aggregate(. ~ gender + num, sum, data=dat)
dat %>% group_by(gender, num) %>% summarise_all(sum) %>% arrange(num)
## gender num h w BMI ## 1 F 1 310 100 41.75347 ## 2 M 1 350 160 52.37302 ## 3 F 2 170 60 20.76125 ## 4 M 2 350 130 42.36618 ## 5 F 3 310 110 45.65972 ## 6 M 3 190 100 27.70083 ## # A tibble: 6 x 5 ## # Groups: gender [2] ## gender num h w BMI ## <chr> <dbl> <dbl> <dbl> <dbl> ## 1 F 1 310 100 41.8 ## 2 M 1 350 160 52.4 ## 3 F 2 170 60 20.8 ## 4 M 2 350 130 42.4 ## 5 F 3 310 110 45.7 ## 6 M 3 190 100 27.7
aggregate(dat, list(dat$gender, dat$num), length)
dat %>% group_by(gender, num) %>% summarise_all(length) %>% arrange(num)
## Group.1 Group.2 gender num h w BMI ## 1 F 1 2 2 2 2 2 ## 2 M 1 2 2 2 2 2 ## 3 F 2 1 1 1 1 1 ## 4 M 2 2 2 2 2 2 ## 5 F 3 2 2 2 2 2 ## 6 M 3 1 1 1 1 1 ## # A tibble: 6 x 5 ## # Groups: gender [2] ## gender num h w BMI ## <chr> <dbl> <int> <int> <int> ## 1 F 1 2 2 2 ## 2 M 1 2 2 2 ## 3 F 2 1 1 1 ## 4 M 2 2 2 2 ## 5 F 3 2 2 2 ## 6 M 3 1 1 1
by(dat, list(dat$gender, dat$num), summary)
# dplyr로 구현하기 힘듦
require(data.table)
datDT <- data.table(dat)
datDT[,{print(paste(gender, num));print(summary(.SD))}, by=c('gender', 'num')]
## : F ## : 1 ## gender num h w BMI ## Length:2 Min. :1 Min. :150.0 Min. :50 Min. :19.53 ## Class :character 1st Qu.:1 1st Qu.:152.5 1st Qu.:50 1st Qu.:20.20 ## Mode :character Median :1 Median :155.0 Median :50 Median :20.88 ## Mean :1 Mean :155.0 Mean :50 Mean :20.88 ## 3rd Qu.:1 3rd Qu.:157.5 3rd Qu.:50 3rd Qu.:21.55 ## Max. :1 Max. :160.0 Max. :50 Max. :22.22 ## --------------------------------------------------------------------------------- ## : M ## : 1 ## gender num h w BMI ## Length:2 Min. :1 Min. :170.0 Min. :80 Min. :24.69 ## Class :character 1st Qu.:1 1st Qu.:172.5 1st Qu.:80 1st Qu.:25.44 ## Mode :character Median :1 Median :175.0 Median :80 Median :26.19 ## Mean :1 Mean :175.0 Mean :80 Mean :26.19 ## 3rd Qu.:1 3rd Qu.:177.5 3rd Qu.:80 3rd Qu.:26.93 ## Max. :1 Max. :180.0 Max. :80 Max. :27.68 ## --------------------------------------------------------------------------------- ## : F ## : 2 ## gender num h w BMI ## Length:1 Min. :2 Min. :170 Min. :60 Min. :20.76 ## Class :character 1st Qu.:2 1st Qu.:170 1st Qu.:60 1st Qu.:20.76 ## Mode :character Median :2 Median :170 Median :60 Median :20.76 ## Mean :2 Mean :170 Mean :60 Mean :20.76 ## 3rd Qu.:2 3rd Qu.:170 3rd Qu.:60 3rd Qu.:20.76 ## Max. :2 Max. :170 Max. :60 Max. :20.76 ## --------------------------------------------------------------------------------- ## : M ## : 2 ## gender num h w BMI ## Length:2 Min. :2 Min. :170.0 Min. :60.0 Min. :20.76 ## Class :character 1st Qu.:2 1st Qu.:172.5 1st Qu.:62.5 1st Qu.:20.97 ## Mode :character Median :2 Median :175.0 Median :65.0 Median :21.18 ## Mean :2 Mean :175.0 Mean :65.0 Mean :21.18 ## 3rd Qu.:2 3rd Qu.:177.5 3rd Qu.:67.5 3rd Qu.:21.39 ## Max. :2 Max. :180.0 Max. :70.0 Max. :21.60 ## --------------------------------------------------------------------------------- ## : F ## : 3 ## gender num h w BMI ## Length:2 Min. :3 Min. :150.0 Min. :50.0 Min. :22.22 ## Class :character 1st Qu.:3 1st Qu.:152.5 1st Qu.:52.5 1st Qu.:22.53 ## Mode :character Median :3 Median :155.0 Median :55.0 Median :22.83 ## Mean :3 Mean :155.0 Mean :55.0 Mean :22.83 ## 3rd Qu.:3 3rd Qu.:157.5 3rd Qu.:57.5 3rd Qu.:23.13 ## Max. :3 Max. :160.0 Max. :60.0 Max. :23.44 ## --------------------------------------------------------------------------------- ## : M ## : 3 ## gender num h w BMI ## Length:1 Min. :3 Min. :190 Min. :100 Min. :27.7 ## Class :character 1st Qu.:3 1st Qu.:190 1st Qu.:100 1st Qu.:27.7 ## Mode :character Median :3 Median :190 Median :100 Median :27.7 ## Mean :3 Mean :190 Mean :100 Mean :27.7 ## 3rd Qu.:3 3rd Qu.:190 3rd Qu.:100 3rd Qu.:27.7 ## Max. :3 Max. :190 Max. :100 Max. :27.7 ## [1] "M 1" ## h w BMI ## Min. :170.0 Min. :80 Min. :24.69 ## 1st Qu.:172.5 1st Qu.:80 1st Qu.:25.44 ## Median :175.0 Median :80 Median :26.19 ## Mean :175.0 Mean :80 Mean :26.19 ## 3rd Qu.:177.5 3rd Qu.:80 3rd Qu.:26.93 ## Max. :180.0 Max. :80 Max. :27.68 ## [1] "M 2" ## h w BMI ## Min. :170.0 Min. :60.0 Min. :20.76 ## 1st Qu.:172.5 1st Qu.:62.5 1st Qu.:20.97 ## Median :175.0 Median :65.0 Median :21.18 ## Mean :175.0 Mean :65.0 Mean :21.18 ## 3rd Qu.:177.5 3rd Qu.:67.5 3rd Qu.:21.39 ## Max. :180.0 Max. :70.0 Max. :21.60 ## [1] "M 3" ## h w BMI ## Min. :190 Min. :100 Min. :27.7 ## 1st Qu.:190 1st Qu.:100 1st Qu.:27.7 ## Median :190 Median :100 Median :27.7 ## Mean :190 Mean :100 Mean :27.7 ## 3rd Qu.:190 3rd Qu.:100 3rd Qu.:27.7 ## Max. :190 Max. :100 Max. :27.7 ## [1] "F 3" ## h w BMI ## Min. :150.0 Min. :50.0 Min. :22.22 ## 1st Qu.:152.5 1st Qu.:52.5 1st Qu.:22.53 ## Median :155.0 Median :55.0 Median :22.83 ## Mean :155.0 Mean :55.0 Mean :22.83 ## 3rd Qu.:157.5 3rd Qu.:57.5 3rd Qu.:23.13 ## Max. :160.0 Max. :60.0 Max. :23.44 ## [1] "F 1" ## h w BMI ## Min. :150.0 Min. :50 Min. :19.53 ## 1st Qu.:152.5 1st Qu.:50 1st Qu.:20.20 ## Median :155.0 Median :50 Median :20.88 ## Mean :155.0 Mean :50 Mean :20.88 ## 3rd Qu.:157.5 3rd Qu.:50 3rd Qu.:21.55 ## Max. :160.0 Max. :50 Max. :22.22 ## [1] "F 2" ## h w BMI ## Min. :170 Min. :60 Min. :20.76 ## 1st Qu.:170 1st Qu.:60 1st Qu.:20.76 ## Median :170 Median :60 Median :20.76 ## Mean :170 Mean :60 Mean :20.76 ## 3rd Qu.:170 3rd Qu.:60 3rd Qu.:20.76 ## Max. :170 Max. :60 Max. :20.76 ## Empty data.table (0 rows and 2 cols): gender,num
Leave a comment