使用HtmlParser解析html,htmlparser解析html,使用HtmlParser
分享于 点击 11743 次 点评:103
使用HtmlParser解析html,htmlparser解析html,使用HtmlParser
使用HtmlParser2.0单文件版实现HTML表格解析
使用HtmlParser2.0单文件版实现HTML表格解析
package org.htmlparser;public class MyTest { public static void main(String[] args) { Parser parser = null; NodeList tableList = null; NodeFilter tableFilter = null; try { String html = "<body><table id=’table1′ >" + "<tr><td>1-11</td><td>1-12</td><td>1-13</td>" + "<tr><td>1-21</td><td>1-22</td><td>1-23</td>" + "<tr><td>1-31</td><td>1-32</td><td>1-33</td></table>" + "<table id=’table2′ >" + "<tr><td>2-11</td><td>2-12</td><td>2-13</td" + "<tr><td>2-21</td><td>2-22</td><td>2-23</td>" + "<tr><td>2-31</td><td>2-32</td><td>2-33</td></table>" + "</body>"; //parser = Parser.createParser(html, "GBK"); parser = new Parser("http://www.hao123.com"); tableFilter = new NodeClassFilter(TableTag.class); //tableFilter = new TagNameFilter("TABLE"); tableList = parser.extractAllNodesThatMatch(tableFilter); for (int i=0; i<tableList.size(); i++) { TableTag table = (TableTag) tableList.elementAt(i); //取得表中的行集 TableRow[] rows = table.getRows(); //遍历每行 for (int r=0; r<rows.length; r++) { TableRow tr = rows[r]; TableColumn[] td = tr.getColumns(); //行中的列 for (int c=0; c<td.length; c++) { System.out.print(td[c].toPlainTextString() + " "); } System.out.println(); } } } catch (ParserException e) { e.printStackTrace(); } }}
用户点评