欢迎访问悦橙教程(wld5.com),关注java教程。悦橙教程  java问答|  每日更新
页面导航 : > > 文章正文

JDK自带的HTML解析器 (HTMLEditorKit.Parser) 示例,,HtmlParseDem

来源: javaer 分享于  点击 18107 次 点评:130

JDK自带的HTML解析器 (HTMLEditorKit.Parser) 示例,,HtmlParseDem


HtmlParseDemo.java

import java.io.*;import java.net.*;import javax.swing.text.*;import javax.swing.text.html.*;import javax.swing.text.html.parser.*;/** * This small demo program shows how to use the * HTMLEditorKit.Parser and its implementing class * ParserDelegator in the Swing system. */public class HtmlParseDemo {    public static void main(String [] args) {        Reader r;        if (args.length == 0) {            System.err.println("Usage: java HTMLParseDemo [url | file]");            System.exit(0);        }        String spec = args[0];        try {            if (spec.indexOf("://") > 0) {                URL u = new URL(spec);                Object content = u.getContent();                if (content instanceof InputStream) {                    r = new InputStreamReader((InputStream)content);                }                else if (content instanceof Reader) {                    r = (Reader)content;                }                else {                    throw new Exception("Bad URL content type.");                }            }            else {                r = new FileReader(spec);            }            HTMLEditorKit.Parser parser;            System.out.println("About to parse " + spec);            parser = new ParserDelegator();            parser.parse(r, new HTMLParseLister(), true);            r.close();        }        catch (Exception e) {            System.err.println("Error: " + e);            e.printStackTrace(System.err);        }    }}

HTMLParseLister.java

/** * HTML parsing proceeds by calling a callback for * each and every piece of the HTML do*****ent.  This * simple callback class simply prints an indented * structural listing of the HTML data. */class HTMLParseLister extends HTMLEditorKit.ParserCallback{    int indentSize = 0;    protected void indent() {        indentSize += 3;    }    protected void unIndent() {        indentSize -= 3; if (indentSize < 0) indentSize = 0;    }    protected void pIndent() {        for(int i = 0; i < indentSize; i++) System.out.print(" ");    }    public void handleText(char[] data, int pos) {        pIndent();        System.out.println("Text(" + data.length + " chars)");    }    public void handleComment(char[] data, int pos) {        pIndent();        System.out.println("Comment(" + data.length + " chars)");    }    public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) {        pIndent();        System.out.println("Tag start(<" + t.toString() + ">, " +                           a.getAttributeCount() + " attrs)");        indent();    }    public void handleEndTag(HTML.Tag t, int pos) {        unIndent();        pIndent();        System.out.println("Tag end(</" + t.toString() + ">)");    }    public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {        pIndent();        System.out.println("Tag(<" + t.toString() + ">, " +                           a.getAttributeCount() + " attrs)");    }    public void handleError(String errorMsg, int pos){        System.out.println("Parsing error: " + errorMsg + " at " + pos);    }}    
相关栏目:

用户点评