java将html转换为纯文本,javahtml纯文本,import java.
分享于 点击 37345 次 点评:140
java将html转换为纯文本,javahtml纯文本,import java.
import java.io.*;import javax.swing.text.html.*;import javax.swing.text.html.parser.*;public class Html2Text extends HTMLEditorKit.ParserCallback { StringBuffer s; public Html2Text() {} public void parse(Reader in) throws IOException { s = new StringBuffer(); ParserDelegator delegator = new ParserDelegator(); // the third parameter is TRUE to ignore charset directive delegator.parse(in, this, Boolean.TRUE); } public void handleText(char[] text, int pos) { s.append(text); } public String getText() { return s.toString(); } public static void main (String[] args) { try { // the HTML to convert FileReader in = new FileReader("java-new.html"); Html2Text parser = new Html2Text(); parser.parse(in); in.close(); System.out.println(parser.getText()); } catch (Exception e) { e.printStackTrace(); } }}
用户点评