{"id":2428,"date":"2022-02-03T11:11:21","date_gmt":"2022-02-03T02:11:21","guid":{"rendered":"http:\/\/ds.sumeun.org\/?p=2428"},"modified":"2022-02-03T13:46:34","modified_gmt":"2022-02-03T04:46:34","slug":"%ed%8c%8c%ec%9d%bc-%ec%9d%bd%ea%b8%b0%ec%97%90%ec%84%9c-bombyte-order-marks","status":"publish","type":"post","link":"http:\/\/ds.sumeun.org\/?p=2428","title":{"rendered":"\ud30c\uc77c \uc77d\uae30\uc5d0\uc11c BOM(Byte Order Marks)"},"content":{"rendered":"<p><a href=\"https:\/\/github.com\/kwhkim\/DAwR03a\/blob\/main\/%EC%84%9C%EC%9A%B8%EC%8B%9C%20%ED%95%9C%EA%B0%95%EA%B3%B5%EC%9B%90%20%EC%9D%B4%EC%9A%A9%EA%B0%9D%20%ED%98%84%ED%99%A9%20(2009_2013%EB%85%84).csv\">\ucca8\ubd80\ud30c\uc77c: \uc11c\uc6b8\uc2dc \ud55c\uac15\uacf5\uc6d0 \uc774\uc6a9\uac1d \ud604\ud669 (2009_2013\ub144).csv\u00a0<\/a><\/p>\n<p>\ucc45\uc5d0\ub294 BOM\uc5d0 \ub300\ud574 \uc790\uc138\ud558\uac8c \uc124\uba85\ud558\uc600\uc2b5\ub2c8\ub2e4.<\/p>\n<p>UTF-8-BOM\uc758 \uacbd\uc6b0 Notepad++\uc5d0\uc11c \uc778\ucf54\ub529\uc744 \ud655\uc778\ud560 \uc218 \uc788\uc2b5\ub2c8\ub2e4\ub9cc,<\/p>\n<p>\ub610 \ubb50 \uad73\uc774 \uadf8\uac83\ub54c\ubb38\uc5d0 Notepad++\ub97c \ub2e4\uc6b4\ub85c\ub4dc \ubc1b\uc544 \uc124\uce58\ud560 \ud544\uc694\uac00&#8230; \uc5c6\uae34 \ud569\ub2c8\ub2e4\ub9cc,<\/p>\n<p>\ub610 \ub9c9\uc0c1 \uc778\ud130\ub137 \uac80\uc0c9\uc744 \ud574\ubcf4\uba74 \ub2e4\ub978 \ubc29\ubc95\uc774 \uc27d\uac8c \ub5a0\uc624\ub974\uc9c0 \uc54a\ub124\uc694.<\/p>\n<p>\uadf8\ub798\uc11c \ub9cc\ub4e4\uc5b4\ubd24\uc2b5\ub2c8\ub2e4. R\uc5d0\uc11c BOM \uc874\uc7ac\ub97c \ud655\uc778\ud558\ub294 \ubc29\ubc95, \uadf8\ub9ac\uace0 \ud30c\uc77c\uc744 \uc77d\ub294 \ubc29\ubc95\uc744 \uc81c\uc548\ud569\ub2c8\ub2e4.<\/p>\n<blockquote>\n<pre># BOM \ub610\ub294 UTF8-Signature \ud655\uc778 \ubc29\ubc95\r\nf &lt;- file(\"\uc11c\uc6b8\uc2dc \ud55c\uac15\uacf5\uc6d0 \uc774\uc6a9\uac1d \ud604\ud669 (2009_2013\ub144).csv\", \"rb\")\r\nmarks &lt;- readBin(f, \"raw\", n=10)\r\n\r\n# \ucd9c\ucc98: https:\/\/en.wikipedia.org\/wiki\/Byte_order_mark\r\nBOM_UTF8 &lt;- as.raw(c(0xef, 0xbb, 0xbf))\r\nBOM_UTF16BE &lt;- as.raw(c(0xfe, 0xff))\r\nBOM_UTF16LE &lt;- as.raw(c(0xff, 0xfe))\r\nBOM_UTF32BE &lt;- as.raw(c(0x00, 0x00, 0xfe, 0xff))\r\nBOM_UTF32LE &lt;- as.raw(c(0xff, 0xfe, 0x00, 0x00))\r\nBOM_UTF7 &lt;- as.raw(c(0x2b, 0x2f, 0x76))\r\nBOM_UTF1 &lt;- as.raw(c(0xf7, 0x64, 0x4c))\r\nBOM_UTF_EBCDIC &lt;- as.raw(c(0xdd, 0x73, 0x66, 0x73))\r\nBOM_SCSU &lt;- as.raw(c(0x0e, 0xfe, 0xff))\r\nBOM_BOCU1 &lt;- as.raw(c(0xfb, 0xee, 0x28))\r\nBOM_GB18030 &lt;- as.raw(c(0x84, 0x31, 0x95, 0x33))\r\nBOMs = list(\"UTF8\"=BOM_UTF8, \r\n\"UTF16BE\"=BOM_UTF16BE, \"UTF16LE\"=BOM_UTF16LE, \r\n\"UTF32BE\"=BOM_UTF32BE, \"UTF32LE\"=BOM_UTF32LE,\r\n\"UTF7\"=BOM_UTF7, \"UTF1\"=BOM_UTF1, \r\n\"UTF-EBCDIC\"=BOM_UTF_EBCDIC, \r\n\"SCSU\"=BOM_SCSU, \r\n\"BOCU-1\"=BOM_BOCU1, \r\n\"GB-18030\"=BOM_GB18030)\r\n\r\ncheck_marks = function(BOM) {\r\nif (all(marks[1:length(BOM)] == BOM)) return(TRUE) else return(FALSE)\r\n}\r\n\r\nnames(BOMs)[sapply(BOMs, check_marks)]\r\n# UTF8<\/pre>\n<\/blockquote>\n<p>\ucca8\ubd80\ud30c\uc77c &#8220;\uc11c\uc6b8\uc2dc \ud55c\uac15\uacf5\uc6d0 \uc774\uc6a9\uac1d \ud604\ud669 (2009_2013\ub144).csv&#8221;\ub294 \uc778\ucf54\ub529\uc774 UTF-8-BOM \ub610\ub294 UTF-8-Signature\uc785\ub2c8\ub2e4(\uccab \uae00\uc790\uac00 \uc720\ub2c8\ucf54\ub4dc 0xfeff\uc774\uc8e0). \uc704\uc758 \ucf54\ub4dc\ub294 \ud30c\uc77c\uc758 \uccab \ubd80\ubd84\uc744 \uc77d\uc5b4\uc11c BOM\uacfc \ube44\uad50\ud574\uc11c \uc778\ucf54\ub529\uc744 \ud655\uc778\ud558\uc8e0. \ub9cc\uc57d BOM\uc774 \uc5c6\ub2e4\uba74 \uc5c6\ub2e4\uace0 \ub098\uc635\ub2c8\ub2e4.<\/p>\n<p>\uc774\uc81c \ud568\uc218\ub85c \ub9cc\ub4e4\uc5b4\ubd05\ub2c8\ub2e4.<\/p>\n<blockquote>\n<pre># ====\r\n# \ud568\uc218\ub85c \ub9cc\ub4e4\uae30\r\n# checkBOM(filename)\r\n\r\ncheckBOM = function(filename) {\r\nmarks &lt;- readBin(filename, \"raw\", n=4)\r\n\r\nBOM_UTF8 &lt;- as.raw(c(0xef, 0xbb, 0xbf))\r\nBOM_UTF16BE &lt;- as.raw(c(0xfe, 0xff))\r\nBOM_UTF16LE &lt;- as.raw(c(0xff, 0xfe))\r\nBOM_UTF32BE &lt;- as.raw(c(0x00, 0x00, 0xfe, 0xff))\r\nBOM_UTF32LE &lt;- as.raw(c(0xff, 0xfe, 0x00, 0x00))\r\nBOM_UTF7 &lt;- as.raw(c(0x2b, 0x2f, 0x76))\r\nBOM_UTF1 &lt;- as.raw(c(0xf7, 0x64, 0x4c))\r\nBOM_UTF_EBCDIC &lt;- as.raw(c(0xdd, 0x73, 0x66, 0x73))\r\nBOM_SCSU &lt;- as.raw(c(0x0e, 0xfe, 0xff))\r\nBOM_BOCU1 &lt;- as.raw(c(0xfb, 0xee, 0x28))\r\nBOM_GB18030 &lt;- as.raw(c(0x84, 0x31, 0x95, 0x33))\r\nBOMs = list(\"UTF8\"=BOM_UTF8, \r\n\"UTF16BE\"=BOM_UTF16BE, \"UTF16LE\"=BOM_UTF16LE, \r\n\"UTF32BE\"=BOM_UTF32BE, \"UTF32LE\"=BOM_UTF32LE,\r\n\"UTF7\"=BOM_UTF7, \"UTF1\"=BOM_UTF1, \r\n\"UTF-EBCDIC\"=BOM_UTF_EBCDIC, \r\n\"SCSU\"=BOM_SCSU, \r\n\"BOCU-1\"=BOM_BOCU1, \r\n\"GB-18030\"=BOM_GB18030)\r\n\r\ncheck_marks = function(BOM) {\r\nif (all(marks[1:length(BOM)] == BOM)) return(TRUE) else return(FALSE)\r\n}\r\n\r\nres = names(BOMs)[sapply(BOMs, check_marks)]\r\nif (length(res)&gt;0) {\r\ncon = file(filename, \"rb\")\r\nreadBin(con, \"raw\", n = length(BOMs[[res]]))\r\nreturn(list(BOM=res, con=con))\r\n} else {\r\nreturn(list(BOM=\"\", con=file(filename), \"rb\"))\r\n}\r\n\r\n}<\/pre>\n<\/blockquote>\n<p>\ubb38\uc81c: \uc704\uc758 \ud568\uc218\ub97c \uc5b4\ub5bb\uac8c \uc368\uc57c \ud560\uae4c\uc694? (\ud78c\ud2b8 : checkBOM(filename))<\/p>\n<p>&nbsp;<\/p>\n<p>## \ucc38\uace0\uc790\ub8cc<\/p>\n<p>* <a href=\"https:\/\/stackoverflow.com\/questions\/39593637\/dealing-with-byte-order-mark-bom-in-r\">https:\/\/stackoverflow.com\/questions\/39593637\/dealing-with-byte-order-mark-bom-in-r<\/a><br \/>\n* <a href=\"https:\/\/118k.tistory.com\/863\">https:\/\/118k.tistory.com\/863<\/a><br \/>\n* <a href=\"https:\/\/github.com\/tidyverse\/readr\/issues\/500\">https:\/\/github.com\/tidyverse\/readr\/issues\/500<\/a><br \/>\n&#8211; readr\uc740 BOM\uc744 \ud574\uacb0\ud55c \uac78\ub85c \uc544\ub294\ub370 \uc9c0\uae08 \ub2e4\uc2dc locale(encoding = &#8220;UTF-8-BOM&#8221;) \ud574\ubcf4\ub2c8 \uc5ec\uc804\ud788 \ubabb \uc77d\uace0 \uc788\ub124\uc694.<\/p>\n<p>&nbsp;<\/p>\n<p>\ucc38\uace0\ub3c4\uc11c&gt; R\ub85c \ud558\ub294 \ube45\ub370\uc774\ud130 \ubd84\uc11d: \ub370\uc774\ud130 \uc804\ucc98\ub9ac\uc640 \uc2dc\uac01\ud654<\/p>\n<p><img loading=\"lazy\" class=\"wp-image-2373 alignleft\" src=\"http:\/\/ds.sumeun.org\/wp-content\/uploads\/2022\/01\/\ud45c\uc9c0_\uc378\ub124\uc77c.png\" alt=\"\" width=\"108\" height=\"154\" \/> <a href=\"https:\/\/www.aladin.co.kr\/shop\/wproduct.aspx?ItemId=286608049\">\u00a0\uc54c\ub77c\ub518<\/a><\/p>\n<p><a href=\"http:\/\/www.yes24.com\/Product\/Goods\/106156560\">\uc608\uc2a424<\/a><\/p>\n<p><a href=\"http:\/\/www.kyobobook.co.kr\/product\/detailViewKor.laf?ejkGb=KOR&amp;mallGb=KOR&amp;barcode=9791196014445&amp;orderClick=LAG&amp;Kc=\">\uad50\ubcf4\ubb38\uace0<\/a><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\ucca8\ubd80\ud30c\uc77c: \uc11c\uc6b8\uc2dc \ud55c\uac15\uacf5\uc6d0 \uc774\uc6a9\uac1d \ud604\ud669 (2009_2013\ub144).csv\u00a0 \ucc45\uc5d0\ub294 BOM\uc5d0 \ub300\ud574 \uc790\uc138\ud558\uac8c \uc124\uba85\ud558\uc600\uc2b5\ub2c8\ub2e4. UTF-8-BOM\uc758 \uacbd\uc6b0 Notepad++\uc5d0\uc11c \uc778\ucf54\ub529\uc744 \ud655\uc778\ud560 \uc218 \uc788\uc2b5\ub2c8\ub2e4\ub9cc, \ub610 \ubb50 \uad73\uc774 \uadf8\uac83\ub54c\ubb38\uc5d0 Notepad++\ub97c \ub2e4\uc6b4\ub85c\ub4dc \ubc1b\uc544 \uc124\uce58\ud560 \ud544\uc694\uac00&#8230; \uc5c6\uae34 \ud569\ub2c8\ub2e4\ub9cc, \ub610 \ub9c9\uc0c1 \uc778\ud130\ub137 \uac80\uc0c9\uc744 \ud574\ubcf4\uba74 \ub2e4\ub978 \ubc29\ubc95\uc774 \uc27d\uac8c \ub5a0\uc624\ub974\uc9c0 \uc54a\ub124\uc694. \uadf8\ub798\uc11c \ub9cc\ub4e4\uc5b4\ubd24\uc2b5\ub2c8\ub2e4. R\uc5d0\uc11c BOM \uc874\uc7ac\ub97c \ud655\uc778\ud558\ub294 \ubc29\ubc95, \uadf8\ub9ac\uace0 \ud30c\uc77c\uc744 \uc77d\ub294 \ubc29\ubc95\uc744 \uc81c\uc548\ud569\ub2c8\ub2e4. # BOM \ub610\ub294 UTF8-Signature [&hellip;]<\/p>\n","protected":false},"author":3225,"featured_media":2432,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[96,28,162,30,209,195,179,289],"tags":[147,20,22,181],"jetpack_featured_media_url":"http:\/\/ds.sumeun.org\/wp-content\/uploads\/2022\/02\/BOMs2.png","_links":{"self":[{"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=\/wp\/v2\/posts\/2428"}],"collection":[{"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=\/wp\/v2\/users\/3225"}],"replies":[{"embeddable":true,"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=2428"}],"version-history":[{"count":3,"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=\/wp\/v2\/posts\/2428\/revisions"}],"predecessor-version":[{"id":2433,"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=\/wp\/v2\/posts\/2428\/revisions\/2433"}],"wp:featuredmedia":[{"embeddable":true,"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=\/wp\/v2\/media\/2432"}],"wp:attachment":[{"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=2428"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=2428"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=2428"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}