{"id":2156,"date":"2020-04-07T22:08:08","date_gmt":"2020-04-07T13:08:08","guid":{"rendered":"http:\/\/141.164.34.82\/?p=2156"},"modified":"2022-02-03T17:15:22","modified_gmt":"2022-02-03T08:15:22","slug":"%ea%b5%90%ec%b0%a8-%ea%b2%80%ec%a6%9d-%eb%8d%b0%ec%9d%b4%ed%84%b0-%ea%b5%ac%ec%84%b1%ed%95%98%ea%b8%b0%ec%99%80-caretcreatefolds","status":"publish","type":"post","link":"http:\/\/ds.sumeun.org\/?p=2156","title":{"rendered":"\uad50\ucc28 \uac80\uc99d \ub370\uc774\ud130 \uad6c\uc131\ud558\uae30\uc640 &#8220;caret::createFolds&#8221;"},"content":{"rendered":"<h2>k-\uacb9 \uad50\ucc28 \uac80\uc99d(k-fold cross validation)<\/h2>\n<p>\uad50\ucc28 \uac80\uc99d\uc740 \ubaa8\ud615\uc758 \uc131\ub2a5\uc744 \ud310\ub2e8\ud558\uae30 \uc704\ud574 \uc0ac\uc6a9\ud55c\ub2e4. \uc120\ud615 \ubaa8\ud615\uc5d0\uc11c \\(R^2\\) (\ub610\ub294 \\(adj-R^2\\) \ub098 AIC, BIC \ub4f1\uc740 \ubaa8\ub450 \ud2b9\uc815\ud55c \uac00\uc815\uc744 \uc131\ub9bd\ud560 \ub54c \uc77c\ubc18\ud654 \uc131\ub2a5\uc744 \ud310\ub2e8\ud558\uae30 \uc704\ud574 \uc0ac\uc6a9\ud560 \uc218 \uc788\uc9c0\ub9cc, \ubaa8\ud615\uc774 \ubcf5\uc7a1\ud558\uac70\ub098, \uc624\ucc28\ud56d\uc774 \uc815\uaddc\ubd84\ud3ec\ub97c \ub744\uc9c0 \uc54a\uac70\ub098 \ud558\ub294 \uc880 \ub354 \uc77c\ubc18\uc801\uc778 \uc0c1\ud669\uc5d0\uc11c\ub294 \ubaa8\ud615\uc758 \uc77c\ubc18\ud654 \uc131\ub2a5\uc744 \uc815\ud655\ud558\uac8c \ubc18\uc601\ud55c\ub2e4\uace0 \ub9d0\ud558\uae30 \ud798\ub4e4\uae30 \ub54c\ubb38\uc774\ub2e4.<\/p>\n<p>\uba3c\uc800 \ub2e4\uc74c\uacfc \uac19\uc740 \ub370\uc774\ud130\uac00 \uc788\ub2e4\uace0 \ud558\uc790.<\/p>\n<pre><code class=\"r\">N = 100\r\nx1 &lt;- rexp(N)\r\nx2 &lt;- rexp(N, 0.5)\r\ne &lt;- rpois(N, 1)\r\ny &lt;- x1 + 2*log(x2+1) + e\r\ndat &lt;- data.frame(x1, x2, y)\r\nhead(dat)\r\n<\/code><\/pre>\n<pre>##           x1         x2        y\r\n## 1 1.35960161 1.09324970 3.837037\r\n## 2 2.13923671 0.03644536 2.210831\r\n## 3 1.81704509 1.42709966 3.590439\r\n## 4 1.27645172 3.51469184 5.291126\r\n## 5 0.08845231 0.94381118 2.417753\r\n## 6 2.57794238 0.71397535 3.655573\r\n<\/pre>\n<p>5-\uacb9 \uad50\ucc28 \uac80\uc99d \ub370\uc774\ud130\ub97c \uad6c\uc131\ud558\ub294 \ubc29\ubc95\uc744 \uc0dd\uac01\ud574\ubcf4\uc790. \ub2e4\uc74c\uacfc \uac19\ub2e4.<\/p>\n<pre><code class=\"r\">isamp &lt;- sample(100)\r\nfolds &lt;- list(fold1 = isamp[1:20], \r\n              fold2 = isamp[21:40], \r\n              fold3 = isamp[41:60], \r\n              fold4 = isamp[61:80], \r\n              fold5 = isamp[81:100])\r\nfolds\r\n<\/code><\/pre>\n<pre>## $fold1\r\n##  [1]  6 92 28 80 54 66 55 11 69 32 17 75 87 84 45 25 68 21 86 85\r\n## \r\n## $fold2\r\n##  [1] 31 33 94 99 29 27 88 89 41 15 64 42 18 72 12 50 26  3 37 44\r\n## \r\n## $fold3\r\n##  [1]  39  76  81  14  48  38  97 100  46  63  90  43   5  49  52  51  56  59  78  23\r\n## \r\n## $fold4\r\n##  [1] 24 93 22 20 67  8 53 61 74 10  1 96 62  9 79 70 65 34  2 40\r\n## \r\n## $fold5\r\n##  [1] 73 77 60 47 91 58 71  4 36 57 19 35 95 82 83 98 13  7 16 30\r\n<\/pre>\n<p>\ud568\uc218 <code>sample()<\/code>\uc740 <code>1<\/code> \ubd80\ud130 <code>100<\/code>\uc758 \uc790\uc5f0\uc218\ub97c \uc801\uc808\ud788 \uc11e\uc5b4\uc11c \uae38\uc774 100\uc758 \uc790\uc5f0\uc218 \ubca1\ud130\ub97c \ubc49\uc5b4\ub0b8\ub2e4. \uadf8\ub9ac\uace0 \uc774 \uacb0\uacfc\ub294 <code>isamp<\/code>\uc5d0 \uc800\uc7a5\ub418\ubbc0\ub85c, 100\uac1c\ub97c 20\uac1c\uc529 \ucabc\uac1c\uc11c validation set\uc73c\ub85c \ub9cc\ub4e4\uba74 \ub41c\ub2e4.<\/p>\n<p>(\uc6b0\ub9ac\ub294 \uac00\uc7a5 \uad6c\uccb4\uc801\uc778 100\uac1c\uc758 \uc790\ub8cc, 5\uac1c\uc758 \uad50\ucc28 \uac80\uc99d \ud69f\uc218 5\uc5d0\uc11c \uc2dc\uc791\ud574\uc11c \uc784\uc758\uc758 \uc790\ub8cc \uac2f\uc218 N, \uc784\uc758\uc758 \uad50\ucc28 \uac80\uc99d k\uc5d0\uc11c \uc791\ub3d9\ud558\ub294 \ubc29\ubc95\uc744 \uad6c\uc131\ud574\ub098\uac00\uace0\uc790 \ud55c\ub2e4.)<\/p>\n<p>\ub530\ub77c\uc11c \uccab \ubc88\uc9f8 \ub370\uc774\ud130\ub294 <code>dat[-folds[[1]],]<\/code>\uc744 train set\uc73c\ub85c <code>dat[folds[[1]],]<\/code>\uc744 test set\uc73c\ub85c \uad6c\uc131\ud558\uba74 \ub41c\ub2e4.<\/p>\n<p>\ud558\uc9c0\ub9cc \uc704\uc758 \ucf54\ub4dc\ub97c \ubcf4\uba74 \ubb54\uac00 \ub9e4\uc6b0 \uc911\ubcf5\ub418\ub294 \uc694\uc18c\uac00 \ub9ce\uc774 \ub4e4\uc5b4\uac00 \uc788\uc74c\uc744 \ud655\uc778\ud560 \uc218 \uc788\ub2e4. <code>list()<\/code> \uc548\uc5d0 <code>isamp<\/code>\uac00 5\ubc88 \ubc18\ubcf5\ub418\uace0 \uc788\ub2e4. \uc22b\uc790 <code>1,20,21,40,41,60,61,80,81,100<\/code>\uc5d0\uc11c\ub3c4 \uc5f0\uc18d\ub418\ub294 \uc790\uc5f0\uc218\ub294 \ub2e4\uc18c \uc911\ubcf5\uc801\uc778 \uc694\uc18c\uc774\ub2e4.<\/p>\n<p><code>isamp<\/code>\uc758 \uc6d0\uc18c\ub97c \uc801\ub2f9\ud558\uac8c \ub098\ub220\uc8fc\ub294 R\uc758 \ud568\uc218\ub97c \uc0ac\uc6a9\ud558\uba74 \ub2e4\uc74c\uacfc \uac19\uc774 \uc4f8 \uc218 \uc788\ub2e4.<\/p>\n<pre><code class=\"r\">folds &lt;- split(isamp, cut(1:100, \r\n                          breaks=100*c(0,0.2, 0.4, 0.6, 0.8, 1.0),\r\n                          include.lowest=TRUE))\r\nfolds\r\n<\/code><\/pre>\n<pre>## $`[0,20]`\r\n##  [1]  6 92 28 80 54 66 55 11 69 32 17 75 87 84 45 25 68 21 86 85\r\n## \r\n## $`(20,40]`\r\n##  [1] 31 33 94 99 29 27 88 89 41 15 64 42 18 72 12 50 26  3 37 44\r\n## \r\n## $`(40,60]`\r\n##  [1]  39  76  81  14  48  38  97 100  46  63  90  43   5  49  52  51  56  59  78  23\r\n## \r\n## $`(60,80]`\r\n##  [1] 24 93 22 20 67  8 53 61 74 10  1 96 62  9 79 70 65 34  2 40\r\n## \r\n## $`(80,100]`\r\n##  [1] 73 77 60 47 91 58 71  4 36 57 19 35 95 82 83 98 13  7 16 30\r\n<\/pre>\n<p><code>split(x, f)<\/code>\ub294 \ubca1\ud130 <code>x<\/code>\uc758 \ub0b4\uc6a9\uc744 \ud329\ud130 <code>f<\/code>\uc5d0 \ub530\ub77c \ub098\ub220\uc900\ub2e4. <code>?split<\/code>\uc744 \ud574\ubcf4\uba74 <code>Divide into Groups and Reassemble<\/code>\uc774\ub77c\uace0 \uc124\uba85\uc744 \ud574\ub193\uc558\ub2e4.<\/p>\n<p>\ub530\ub77c\uc11c \uc6b0\uc120 \ub2e4\uc74c\uacfc \uac19\uc774 \ud574\ubcfc \uc218 \uc788\ub2e4.<\/p>\n<pre><code class=\"r\">folds &lt;- split(isamp, c(rep(1,20), rep(2,20), rep(3,20), rep(4,20), rep(5,20)))\r\nfolds\r\n<\/code><\/pre>\n<pre>## $`1`\r\n##  [1]  6 92 28 80 54 66 55 11 69 32 17 75 87 84 45 25 68 21 86 85\r\n## \r\n## $`2`\r\n##  [1] 31 33 94 99 29 27 88 89 41 15 64 42 18 72 12 50 26  3 37 44\r\n## \r\n## $`3`\r\n##  [1]  39  76  81  14  48  38  97 100  46  63  90  43   5  49  52  51  56  59  78  23\r\n## \r\n## $`4`\r\n##  [1] 24 93 22 20 67  8 53 61 74 10  1 96 62  9 79 70 65 34  2 40\r\n## \r\n## $`5`\r\n##  [1] 73 77 60 47 91 58 71  4 36 57 19 35 95 82 83 98 13  7 16 30\r\n<\/pre>\n<p><code>c(rep(1,20), rep(2,20), rep(3,20), rep(4,20), rep(5,20))<\/code>\uc740 \ucd1d \uc6d0\uc18c 100\uac1c\ub97c 20\uac1c\uc529 \uc9d1\ub2e8\uc73c\ub85c \ub098\ub204\ub294 \ud45c\uc2dd\uc774\ub77c\uace0 \uc0dd\uac01\ud560 \uc218 \uc788\ub2e4. \uc704\uc758 <code>cut()<\/code>\uc740 \ube44\uc2b7\ud55c \ubc29\ubc95\uc774\uc9c0\ub9cc \ub204\uc801\ube44\uc728\ub85c \ub098\ud0c0\ub0bc \uc218 \uc788\ub2e4\ub294 \uc7a5\uc810\uc774 \uc788\ub2e4.<\/p>\n<p>\uc704\uc758 \ubc29\ubc95\uc740 \uc790\ub8cc\uc758 \ud06c\uae30\ub97c \uc77c\ubc18\ud654\ud560 \uc218 \uc788\ub2e4\ub294 \uc7a5\uc810\uc774 \uc788\ub2e4. \ub2e4\uc74c\uc758 \uc608\ub97c \ubcf4\uc790. <code>N<\/code>\uc5d0 \uc5b4\ub5a4 \uc218\ub97c \uc9d1\uc5b4 \ub123\uc5b4\ub3c4 \uc798 \uc791\ub3d9\ud55c\ub2e4!<\/p>\n<pre><code class=\"r\">N=100\r\nfolds1 &lt;- split(isamp, cut(1:N, \r\n                          breaks=N*c(0,0.2, 0.4, 0.6, 0.8, 1.0),\r\n                          include.lowest=TRUE))\r\nfolds2 &lt;- split(isamp, c(rep(1,N*0.2), rep(2,N*0.2), rep(3,N*0.2), \r\n                         rep(4,N*0.2), rep(5,N*0.2)))\r\nfolds1\r\n<\/code><\/pre>\n<pre>## $`[0,20]`\r\n##  [1]  6 92 28 80 54 66 55 11 69 32 17 75 87 84 45 25 68 21 86 85\r\n## \r\n## $`(20,40]`\r\n##  [1] 31 33 94 99 29 27 88 89 41 15 64 42 18 72 12 50 26  3 37 44\r\n## \r\n## $`(40,60]`\r\n##  [1]  39  76  81  14  48  38  97 100  46  63  90  43   5  49  52  51  56  59  78  23\r\n## \r\n## $`(60,80]`\r\n##  [1] 24 93 22 20 67  8 53 61 74 10  1 96 62  9 79 70 65 34  2 40\r\n## \r\n## $`(80,100]`\r\n##  [1] 73 77 60 47 91 58 71  4 36 57 19 35 95 82 83 98 13  7 16 30\r\n<\/pre>\n<pre><code class=\"r\">names(folds1) = NULL\r\nnames(folds2) = NULL\r\nall.equal(folds1, folds2)\r\n<\/code><\/pre>\n<pre>## [1] TRUE\r\n<\/pre>\n<p>\ub450 \ubc88\uc9f8 \uad00\ubb38\uc740 <code>k<\/code>\ub97c \uc77c\ubc18\ud654\ud558\ub294 \uac83\uc774\ub2e4.<\/p>\n<p><code>k<\/code>\ub97c \uc77c\ubc18\ud654\ud558\uae30 \uc704\ud574\uc11c\ub294 <code>c(0, 0.2, 0.4, 0.6, 0.8, 1.0)<\/code> \ubd80\ubd84\uc744 \uc801\ub2f9\ud788 \uace0\uccd0\uc57c \ud560 \uac83\uc774\ub2e4. \uc608\ub97c \ub4e4\uc5b4 <code>k=2<\/code>\ub77c\uba74 <code>c(0, 0.5, 1)<\/code>\uc774 \ub418\uc5b4\uc57c \ud558\uba70, <code>k=4<\/code>\ub77c\uba74 <code>c(0, 0.25, 0.5, 0.75, 1)<\/code>\uc774 \ub418\uc5b4\uc57c \ud55c\ub2e4. \uacf5\ud1b5\uc810\uc774 \ubcf4\uc774\ub294\uac00? \uc2dc\uc791\uc740 \uc5b8\uc81c\ub098 <code>0<\/code>\uc774\uace0, \ub9c8\uc9c0\ub9c9\uc740 \uc5b8\uc81c\ub098 <code>1<\/code>\uc774\ub2e4. \uadf8\ub9ac\uace0 <code>k<\/code>\uc5d0 \ub530\ub77c \uae38\uc774\uac00 \ub2ec\ub77c\uc838\uc57c \ud55c\ub2e4.<\/p>\n<p>\uc774\ub7f0 \uacbd\uc6b0 R\uc758 <code>seq<\/code>\ub97c \uc0ac\uc6a9\ud558\uc5ec <code>seq(from=0, to=1, length.out=k+1)<\/code>\ub85c \uc4f8 \uc218 \uc788\ub2e4. \ub530\ub77c\uc11c \ub2e4\uc74c\uacfc \uac19\uc740 \uacb0\uacfc\ub97c \uc5bb\ub294\ub2e4.<\/p>\n<pre><code class=\"r\">N = 100; k = 10\r\nisamp &lt;- sample(N)\r\nfolds &lt;- split(isamp, cut(1:N,\r\n                          breaks=N*seq(0, 1, length.out=k+1),\r\n                          include.lowest=TRUE))\r\n#folds\r\n<\/code><\/pre>\n<p><code>N<\/code>\uc5d0 <code>101<\/code>\ub97c \ub123\uc5b4\ub3c4, <code>k<\/code>\uc5d0 <code>17<\/code>\ub97c \ub123\uc5b4\ub3c4 \uc798 \uc791\ub3d9\ud568\uc744 \ud655\uc778\ud560 \uc218 \uc788\uc744 \uac83\uc774\ub2e4! \uacb0\uad6d \uc77c\ubc18\uc801\uc778 <code>N<\/code>\uacfc <code>k<\/code>\uc5d0 \ub300\ud574 <code>k<\/code>-\uacb9 \uad50\ucc28 \uac80\uc99d \ub370\uc774\ud130 \uc14b\uc744 \uad6c\uc131\ud560 \uc218 \uc788\ub294 \ubc29\ubc95\uc744 \uac1c\ubc1c\ud558\uc600\ub2e4!<\/p>\n<pre><code class=\"r\">N = 101; k = 17\r\nisamp &lt;- sample(N)\r\nfolds &lt;- split(isamp, cut(1:N,\r\n                          breaks=N*seq(0, 1, length.out=k+1),\r\n                          include.lowest=TRUE))\r\nfolds\r\n<\/code><\/pre>\n<pre>## $`[0,5.94]`\r\n## [1] 26 98 86  4 17\r\n## \r\n## $`(5.94,11.9]`\r\n## [1] 83 13 56 10 81 91\r\n## \r\n## $`(11.9,17.8]`\r\n## [1] 18  1 88 62 16 78\r\n## \r\n## $`(17.8,23.8]`\r\n## [1]  5 49 72 47 59 82\r\n## \r\n## $`(23.8,29.7]`\r\n## [1] 38 24 21 63 68 67\r\n## \r\n## $`(29.7,35.6]`\r\n## [1] 41 71 53 61 23 64\r\n## \r\n## $`(35.6,41.6]`\r\n## [1] 27 69 85 76 30 54\r\n## \r\n## $`(41.6,47.5]`\r\n## [1] 11 33 75 52 84 92\r\n## \r\n## $`(47.5,53.5]`\r\n## [1] 89 73 36 93 25 15\r\n## \r\n## $`(53.5,59.4]`\r\n## [1] 94 55 42 80 32 44\r\n## \r\n## $`(59.4,65.4]`\r\n## [1]  3 45 66 50 22 90\r\n## \r\n## $`(65.4,71.3]`\r\n## [1] 96 40 20 43 28 74\r\n## \r\n## $`(71.3,77.2]`\r\n## [1] 79 77 97  7 35 34\r\n## \r\n## $`(77.2,83.2]`\r\n## [1]  12  19  99  29   2 100\r\n## \r\n## $`(83.2,89.1]`\r\n## [1]  70  48  60  51 101  65\r\n## \r\n## $`(89.1,95.1]`\r\n## [1] 37 14 87 46 39 57\r\n## \r\n## $`(95.1,101]`\r\n## [1]  8 58  9 31 95  6\r\n<\/pre>\n<pre><code class=\"r\">sapply(folds, length)\r\n<\/code><\/pre>\n<pre>##    [0,5.94] (5.94,11.9] (11.9,17.8] (17.8,23.8] (23.8,29.7] (29.7,35.6] (35.6,41.6] (41.6,47.5] \r\n##           5           6           6           6           6           6           6           6 \r\n## (47.5,53.5] (53.5,59.4] (59.4,65.4] (65.4,71.3] (71.3,77.2] (77.2,83.2] (83.2,89.1] (89.1,95.1] \r\n##           6           6           6           6           6           6           6           6 \r\n##  (95.1,101] \r\n##           6\r\n<\/pre>\n<pre><code class=\"r\">lelem &lt;- sapply(folds, length)\r\nsum(lelem)\r\n<\/code><\/pre>\n<pre>## [1] 101\r\n<\/pre>\n<h2><code>caret<\/code>\uc758 <code>createFolds<\/code><\/h2>\n<p><code>caret<\/code>\uc758 <code>createFolds<\/code>\ub294 \uc6b0\ub9ac\uac00 \ud588\ub358 \ubc14\ub85c \uadf8 \uc791\uc5c5\uc744 \ud574\uc8fc\ub294 \ud568\uc218\uc774\ub2e4.<\/p>\n<pre><code class=\"r\">folds &lt;- caret::createFolds(y=1:101, k=7)\r\nfolds\r\n<\/code><\/pre>\n<pre>## $Fold1\r\n##  [1]  1  4 12 25 33 36 42 53 54 57 76 79 81 86\r\n## \r\n## $Fold2\r\n##  [1] 14 15 17 27 29 38 52 55 62 63 78 93 97\r\n## \r\n## $Fold3\r\n##  [1]   2  11  13  20  40  44  46  49  59  61  65  75  84  90  92 100\r\n## \r\n## $Fold4\r\n##  [1]   3   8   9  23  32  34  35  51  58  71  73  91  94  98 101\r\n## \r\n## $Fold5\r\n##  [1]  5 19 21 24 31 41 45 48 64 66 67 85 87 89 95\r\n## \r\n## $Fold6\r\n##  [1]  6  7 22 26 30 37 47 50 56 60 70 72 77 82 83 88\r\n## \r\n## $Fold7\r\n##  [1] 10 16 18 28 39 43 68 69 74 80 96 99\r\n<\/pre>\n<pre><code class=\"r\">sapply(folds, length)\r\n<\/code><\/pre>\n<pre>## Fold1 Fold2 Fold3 Fold4 Fold5 Fold6 Fold7 \r\n##    14    13    16    15    15    16    12\r\n<\/pre>\n<p>\ud478\ud558\ud558! \uc6b0\ub9ac\uac00 \ub9cc\ub4e4\uc5c8\ub358 \ud568\uc218\ubcf4\ub2e4 \uc57d\uac04 \ubabb\ud55c \uac83 \uac19\ub2e4.<\/p>\n<pre><code class=\"r\">lelem &lt;- sapply(folds, length)\r\nsum(lelem)\r\n<\/code><\/pre>\n<pre>## [1] 101\r\n<\/pre>\n<h2><code>caret::createFolds<\/code>\uc758 \uc7a5\uc810<\/h2>\n<p>\ud558\uc9c0\ub9cc <code>createFolds<\/code>\ub294 target label\uc744 \uade0\ub4f1\ud558\uac8c \ubc30\ubd84\ud558\ub294 \uae30\ub2a5\uc774 \uc788\ub2e4. \uc774\uac8c \ubb34\uc2a8 \ub9d0\uc778\uac00?<\/p>\n<pre><code class=\"r\">library(caret)\r\ny &lt;- sample(3, 100, replace=TRUE, prob=c(0.2, 0.3, 0.5))\r\nfolds &lt;- createFolds(y, k=3)\r\nfolds\r\n<\/code><\/pre>\n<pre>## $Fold1\r\n##  [1]  6  7 10 15 16 21 22 23 24 27 29 30 32 46 50 51 52 53 59 63 64 65 66 68 69 75 76 78 81 86 89 92\r\n## [33] 94 96\r\n## \r\n## $Fold2\r\n##  [1]   2   5   9  11  14  17  18  20  34  35  36  37  38  40  41  42  43  44  47  48  49  55  60  61\r\n## [25]  62  72  83  84  85  87  93  95  98 100\r\n## \r\n## $Fold3\r\n##  [1]  1  3  4  8 12 13 19 25 26 28 31 33 39 45 54 56 57 58 67 70 71 73 74 77 79 80 82 88 90 91 97 99\r\n<\/pre>\n<p>\uc704\uc5d0\uc11c <code>y<\/code>\uc5d0\ub294 <code>1<\/code> \ub610\ub294 <code>2<\/code> \ub610\ub294 <code>3<\/code>\uc774 \uc800\uc7a5\ub418\uc5b4 \uc788\uc73c\uba70, \uadf8 \ube44\uc728\uc740 0.2, 0.3, 0.5\uacfc \ube44\uc2b7\ud558\ub2e4.<\/p>\n<pre><code class=\"r\">table(y)\r\n<\/code><\/pre>\n<pre>## y\r\n##  1  2  3 \r\n## 22 34 44\r\n<\/pre>\n<p>\uc6b0\ub9ac\uac00 k-\uacb9 \uad50\ucc28 \uac80\uc99d \ub370\uc774\ud130\ub97c \uad6c\uc131\ud560 \ub54c, target label\uc758 \ube44\uc728\uc774 \uac01 fold\ub9c8\ub2e4 \uc9c0\ub098\uce58\uac8c \ub2e4\ub974\uac8c \ub418\uba70 \ud559\uc2b5\uc5d0 \uc774\ub7f0 imbalance\uac00 \ubc18\uc601\ub420 \uc218 \uc788\ub2e4. \uc608\ub97c \ub4e4\uc5b4 y\ub97c \uc608\uce21\ud574\uc57c \ud558\ub294\ub370, train set\uc5d0 \ubaa8\ub450 y=1\uc778 \uc0ac\ub840\ub9cc \ub4e4\uc5b4\uac00 \uc788\ub294 \uadf9\ub2e8\uc801\uc778 \uacbd\uc6b0\ub97c \uc0dd\uac01\ud574\ubcf4\uba74 imbalance\uc758 \ubb38\uc81c\ub97c \uc774\ud574\ud560 \uc218 \uc788\uc744 \uac83\uc774\ub2e4.<\/p>\n<p><code>caret::createFolds<\/code>\ub294 \uc774\ub807\uac8c target label\uc758 \ube44\uc728\uc744 \uc758\ub3c4\uc801\uc73c\ub85c \ub9de\ucdb0\uc900\ub2e4. \ub2e4\uc74c\uc5d0\uc11c \ud655\uc778\ud560 \uc218 \uc788\ub2e4.<\/p>\n<pre><code class=\"r\">library(magrittr)\r\ntable(y[-folds[[1]]]) %&gt;% prop.table %&gt;% round(1)\r\n<\/code><\/pre>\n<pre>## \r\n##   1   2   3 \r\n## 0.2 0.3 0.4\r\n<\/pre>\n<pre><code class=\"r\">table(y[-folds[[2]]]) %&gt;% prop.table %&gt;% round(1)\r\n<\/code><\/pre>\n<pre>## \r\n##   1   2   3 \r\n## 0.2 0.3 0.4\r\n<\/pre>\n<pre><code class=\"r\">table(y[-folds[[3]]]) %&gt;% prop.table %&gt;% round(1)\r\n<\/code><\/pre>\n<pre>## \r\n##   1   2   3 \r\n## 0.2 0.4 0.4\r\n<\/pre>\n<p>\ubc18\uba74 \uc6b0\ub9ac\uac00 \uac1c\ubc1c\ud55c \ud568\uc218\uc5d0\ub294 \uadf8\ub7f0 \uae30\ub2a5\uc774 \uc5c6\ub2e4.<\/p>\n<pre><code class=\"r\">N = 100; k = 3\r\nisamp &lt;- sample(N)\r\nfolds &lt;- split(isamp, cut(1:N,\r\n                          breaks=N*seq(0, 1, length.out=k+1),\r\n                          include.lowest=TRUE))\r\nsapply(folds, length)\r\n<\/code><\/pre>\n<pre>##    [0,33.3] (33.3,66.7]  (66.7,100] \r\n##          33          33          34\r\n<\/pre>\n<pre><code class=\"r\">table(y[-folds[[1]]]) %&gt;% prop.table %&gt;% round(1)\r\n<\/code><\/pre>\n<pre>## \r\n##   1   2   3 \r\n## 0.2 0.3 0.5\r\n<\/pre>\n<pre><code class=\"r\">table(y[-folds[[2]]]) %&gt;% prop.table %&gt;% round(1)\r\n<\/code><\/pre>\n<pre>## \r\n##   1   2   3 \r\n## 0.2 0.3 0.4\r\n<\/pre>\n<pre><code class=\"r\">table(y[-folds[[3]]]) %&gt;% prop.table %&gt;% round(1)\r\n<\/code><\/pre>\n<pre>## \r\n##   1   2   3 \r\n## 0.2 0.4 0.4\r\n<\/pre>\n<h2>\uacb0\ub860<\/h2>\n<p>k-\uacb9 \uad50\ucc28 \uac80\uc99d \ub370\uc774\ud130\ub97c \uad6c\uc131\ud558\uae30 \uc704\ud574 \ud544\uc694\ud55c \ud568\uc218\ub97c \uac1c\ubc1c\ud558\uc600\ub2e4.<\/p>\n<pre><code class=\"r\">createfolds = function(N, k) {\r\n  isamp &lt;- sample(N)\r\n  split(isamp, cut(1:N,\r\n                   breaks=N*seq(0, 1, length.out=k+1),\r\n                   include.lowest=TRUE))\r\n}\r\ncreatefolds(77,4)\r\n<\/code><\/pre>\n<pre>## $`[0,19.2]`\r\n##  [1] 26 43 60 34 70 24  2  7 64  9 52 16 14 74 57 59 10 69 42\r\n## \r\n## $`(19.2,38.5]`\r\n##  [1] 62 19 13 73 77 65  1 22 67  3 37 56 58 29  8 63 17 53  6\r\n## \r\n## $`(38.5,57.8]`\r\n##  [1] 41 66 36 76 44 11 21 15 49  4 61 27 48 38 32 54 51 39  5\r\n## \r\n## $`(57.8,77]`\r\n##  [1] 12 28 46 20 50 45 40 31 75 23 18 55 68 33 30 35 25 71 72 47\r\n<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>k-\uacb9 \uad50\ucc28 \uac80\uc99d(k-fold cross validation) \uad50\ucc28 \uac80\uc99d\uc740 \ubaa8\ud615\uc758 \uc131\ub2a5\uc744 \ud310\ub2e8\ud558\uae30 \uc704\ud574 \uc0ac\uc6a9\ud55c\ub2e4. \uc120\ud615 \ubaa8\ud615\uc5d0\uc11c \\(R^2\\) (\ub610\ub294 \\(adj-R^2\\) \ub098 AIC, BIC \ub4f1\uc740 \ubaa8\ub450 \ud2b9\uc815\ud55c \uac00\uc815\uc744 \uc131\ub9bd\ud560 \ub54c \uc77c\ubc18\ud654 \uc131\ub2a5\uc744 \ud310\ub2e8\ud558\uae30 \uc704\ud574 \uc0ac\uc6a9\ud560 \uc218 \uc788\uc9c0\ub9cc, \ubaa8\ud615\uc774 \ubcf5\uc7a1\ud558\uac70\ub098, \uc624\ucc28\ud56d\uc774 \uc815\uaddc\ubd84\ud3ec\ub97c \ub744\uc9c0 \uc54a\uac70\ub098 \ud558\ub294 \uc880 \ub354 \uc77c\ubc18\uc801\uc778 \uc0c1\ud669\uc5d0\uc11c\ub294 \ubaa8\ud615\uc758 \uc77c\ubc18\ud654 \uc131\ub2a5\uc744 \uc815\ud655\ud558\uac8c \ubc18\uc601\ud55c\ub2e4\uace0 \ub9d0\ud558\uae30 \ud798\ub4e4\uae30 \ub54c\ubb38\uc774\ub2e4. \uba3c\uc800 \ub2e4\uc74c\uacfc \uac19\uc740 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":2449,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[443,28],"tags":[389,445,444],"jetpack_featured_media_url":"http:\/\/ds.sumeun.org\/wp-content\/uploads\/2020\/04\/woven-g7d6979c2a_640.jpg","_links":{"self":[{"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=\/wp\/v2\/posts\/2156"}],"collection":[{"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=2156"}],"version-history":[{"count":6,"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=\/wp\/v2\/posts\/2156\/revisions"}],"predecessor-version":[{"id":2450,"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=\/wp\/v2\/posts\/2156\/revisions\/2450"}],"wp:featuredmedia":[{"embeddable":true,"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=\/wp\/v2\/media\/2449"}],"wp:attachment":[{"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=2156"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=2156"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=2156"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}