{"id":2384,"date":"2022-01-14T01:54:09","date_gmt":"2022-01-13T16:54:09","guid":{"rendered":"http:\/\/ds.sumeun.org\/?p=2384"},"modified":"2022-01-30T04:53:55","modified_gmt":"2022-01-29T19:53:55","slug":"html_table%ea%b3%bc-invalid-multibyte-string","status":"publish","type":"post","link":"http:\/\/ds.sumeun.org\/?p=2384","title":{"rendered":"html_table()\uacfc invalid multibyte string"},"content":{"rendered":"<blockquote>\n<pre>library(rvest)<\/pre>\n<\/blockquote>\n<p>rvest \ud328\ud0a4\uc9c0\ub294 \uc6f9\ud06c\ub864\ub9c1\uc5d0 \uc694\uae34\ud558\uac8c \uc4f0\uc778\ub2e4.<\/p>\n<p>\uadf8\ub7f0\ub370 \uc774 \ud328\ud0a4\uc9c0\ub85c \ud55c\uae00 \ud45c\ub97c \ud06c\ub864\ub9c1\ud560 \ub54c\uc5d0\ub294 \uc54c\ub824\uc9c4 \ubb38\uc81c\uac00 \uc788\ub2e4.<\/p>\n<p>http:\/\/fow.kr\/find\/af23c4ee03bd666 \uc758 \ub370\uc774\ud130\ub97c \ud06c\ub864\ub9c1\ud574\ubcf4\uc790.<\/p>\n<blockquote>\n<pre>url<span class=\"token operator\">=<\/span><span class=\"token string\"><span class=\"hljs-string\">'http:\/\/fow.kr\/find\/af23c4ee03bd666'<\/span><\/span> \r\nhtml<span class=\"token operator\">=<\/span>read_html<span class=\"token punctuation\">(<\/span>url<span class=\"token punctuation\">,<\/span>encoding<span class=\"token operator\">=<\/span><span class=\"token string\"><span class=\"hljs-string\">'UTF-8'<\/span><\/span><span class=\"token punctuation\">)<\/span> \r\ntable<span class=\"token operator\">=<\/span>html <span class=\"token percent-operator operator\">%&gt;%<\/span> html_nodes<span class=\"token punctuation\">(<\/span><span class=\"token string\"><span class=\"hljs-string\">\".tablesorter\"<\/span><\/span><span class=\"token punctuation\">)<\/span> <span class=\"token percent-operator operator\">%&gt;%<\/span> html_table<span class=\"token punctuation\">(<\/span><span class=\"token punctuation\">)\r\n<\/span><\/pre>\n<\/blockquote>\n<p>\uc708\ub3c4\uc6b0\uc5d0\uc11c invalid multibyte string \uc624\ub958\uac00 \ubc1c\uc0dd\ud55c\ub2e4.\u00a0 \uc774\ub97c \ud574\uacb0\ud558\uae30 \uc704\ud574 locale\ub97c \uc7a0\uae50 \ubcc0\uacbd\ud558\uac70\ub098(<strong>Sys.setlocale(&#8220;LC_ALL&#8221;, &#8220;English&#8221;)<\/strong>),\u00a0<strong>RCurl::getURL()<\/strong>\uc640\u00a0<strong>XML::readHTMLTable()<\/strong>\uc744 \uad8c\uc720\ud558\uae30\ub3c4 \ud55c\ub2e4. \ud558\uc9c0\ub9cc <a href=\"https:\/\/support.rstudio.com\/hc\/en-us\/articles\/200532197-Character-Encoding-in-the-RStudio-IDE\">RStudio\uc5d0\uc11c\ub294 RStudio\ub97c \uc0ac\uc6a9\ud558\ub294 \ub3c4\uc911\uc5d0 locale\uc744 \ubcc0\uacbd\ud558\uc9c0 \ub9d0\ub77c\uace0 \uad8c\uc720\ud558\uace0 \uc788\ub2e4<\/a>.<\/p>\n<p>\uc5ec\uae30\uc11c\ub294 \uc880 \ub354 \uac04\ub2e8\ud55c \ubc29\ubc95\uc744 \uc81c\uc2dc\ud55c\ub2e4.<\/p>\n<blockquote>\n<pre>url<span class=\"token operator\">=<\/span><span class=\"token string\"><span class=\"hljs-string\">'http:\/\/fow.kr\/find\/af23c4ee03bd666'<\/span><\/span> \r\nhtml<span class=\"token operator\">=<\/span>read_html<span class=\"token punctuation\">(<\/span>url<span class=\"token punctuation\">,<\/span>encoding<span class=\"token operator\">=<\/span><span class=\"token string\"><span class=\"hljs-string\">'UTF-8'<\/span><\/span><span class=\"token punctuation\">)<\/span> \r\nhtml_lol %&gt;% \r\n  html_nodes(\".tablesorter\") %&gt;% \r\n  html_table(<strong>convert =FALSE<\/strong>) %&gt;% \r\n  lapply(readr::<strong>type_convert<\/strong>)<\/pre>\n<\/blockquote>\n<p><strong>\ud575\uc2ec\uc740 html_table() \ud568\uc218\uc5d0 \ub9e4\uac1c\ubcc0\uc218 convert\ub97c FALSE\ub85c \uc124\uc815\ud558\ub294 \uac83\uc774\ub2e4.<\/strong> \uc0dd\uac01\ubcf4\ub2e4 \uc27d\uace0 \uac04\ub2e8\ud558\ub2e4!<\/p>\n<p>&nbsp;<\/p>\n<p><!--more--><\/p>\n<p>\ucc38\uace0 \uc790\ub8cc<\/p>\n<ul>\n<li>https:\/\/protect0.tistory.com\/7<\/li>\n<li>http:\/\/lumiamitie.github.io\/r_tutorial\/datadesigner_2\/03_web_scraping.html<\/li>\n<\/ul>\n<p>&nbsp;<\/p>\n<p>\ubd80\ubd84\ucd9c\ucc98&gt; R\ub85c \ud558\ub294 \ube45\ub370\uc774\ud130 \ubd84\uc11d: \ub370\uc774\ud130 \uc804\ucc98\ub9ac\uc640 \uc2dc\uac01\ud654<\/p>\n<p><img loading=\"lazy\" class=\"wp-image-2373 alignleft\" src=\"http:\/\/ds.sumeun.org\/wp-content\/uploads\/2022\/01\/\ud45c\uc9c0_\uc378\ub124\uc77c.png\" alt=\"\" width=\"108\" height=\"154\" \/> <a href=\"https:\/\/www.aladin.co.kr\/shop\/wproduct.aspx?ItemId=286608049\">\u00a0\uc54c\ub77c\ub518<\/a><\/p>\n<p><a href=\"http:\/\/www.yes24.com\/Product\/Goods\/106156560\">\uc608\uc2a424<\/a><\/p>\n<p><a href=\"http:\/\/www.kyobobook.co.kr\/product\/detailViewKor.laf?ejkGb=KOR&amp;mallGb=KOR&amp;barcode=9791196014445&amp;orderClick=LAG&amp;Kc=\">\uad50\ubcf4\ubb38\uace0<\/a><\/p>\n<p>&nbsp;<\/p>\n","protected":false},"excerpt":{"rendered":"<p>library(rvest) rvest \ud328\ud0a4\uc9c0\ub294 \uc6f9\ud06c\ub864\ub9c1\uc5d0 \uc694\uae34\ud558\uac8c \uc4f0\uc778\ub2e4. \uadf8\ub7f0\ub370 \uc774 \ud328\ud0a4\uc9c0\ub85c \ud55c\uae00 \ud45c\ub97c \ud06c\ub864\ub9c1\ud560 \ub54c\uc5d0\ub294 \uc54c\ub824\uc9c4 \ubb38\uc81c\uac00 \uc788\ub2e4. http:\/\/fow.kr\/find\/af23c4ee03bd666 \uc758 \ub370\uc774\ud130\ub97c \ud06c\ub864\ub9c1\ud574\ubcf4\uc790. url=&#8217;http:\/\/fow.kr\/find\/af23c4ee03bd666&#8242; html=read_html(url,encoding=&#8217;UTF-8&#8242;) table=html %&gt;% html_nodes(&#8220;.tablesorter&#8221;) %&gt;% html_table() \uc708\ub3c4\uc6b0\uc5d0\uc11c invalid multibyte string \uc624\ub958\uac00 \ubc1c\uc0dd\ud55c\ub2e4.\u00a0 \uc774\ub97c \ud574\uacb0\ud558\uae30 \uc704\ud574 locale\ub97c \uc7a0\uae50 \ubcc0\uacbd\ud558\uac70\ub098(Sys.setlocale(&#8220;LC_ALL&#8221;, &#8220;English&#8221;)),\u00a0RCurl::getURL()\uc640\u00a0XML::readHTMLTable()\uc744 \uad8c\uc720\ud558\uae30\ub3c4 \ud55c\ub2e4. \ud558\uc9c0\ub9cc RStudio\uc5d0\uc11c\ub294 RStudio\ub97c \uc0ac\uc6a9\ud558\ub294 \ub3c4\uc911\uc5d0 locale\uc744 \ubcc0\uacbd\ud558\uc9c0 \ub9d0\ub77c\uace0 \uad8c\uc720\ud558\uace0 \uc788\ub2e4. \uc5ec\uae30\uc11c\ub294 \uc880 \ub354 [&hellip;]<\/p>\n","protected":false},"author":3225,"featured_media":2419,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[96,95,28,483,30,248,195,76],"tags":[22],"jetpack_featured_media_url":"http:\/\/ds.sumeun.org\/wp-content\/uploads\/2022\/01\/chicago-g0fb3094f4_640.jpg","_links":{"self":[{"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=\/wp\/v2\/posts\/2384"}],"collection":[{"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=\/wp\/v2\/users\/3225"}],"replies":[{"embeddable":true,"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=2384"}],"version-history":[{"count":3,"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=\/wp\/v2\/posts\/2384\/revisions"}],"predecessor-version":[{"id":2387,"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=\/wp\/v2\/posts\/2384\/revisions\/2387"}],"wp:featuredmedia":[{"embeddable":true,"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=\/wp\/v2\/media\/2419"}],"wp:attachment":[{"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=2384"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=2384"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/ds.sumeun.org\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=2384"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}