使用HtmlCleaner从指定网页中提取链接的例子,htmlcleaner例子,需要依赖 htmlcle
分享于 点击 1150 次 点评:273
使用HtmlCleaner从指定网页中提取链接的例子,htmlcleaner例子,需要依赖 htmlcle
需要依赖 htmlcleaner-2.2.jar 或以上版本
package crawl;import java.net.URL;import org.htmlcleaner.HtmlCleaner;import org.htmlcleaner.TagNode;/** * * @author Tobacco */public class Crawl{ public static void main(String[] args) throws Exception { try { HtmlCleaner cleaner = new HtmlCleaner(); URL url = new URL("http://www.baidu.com"); TagNode node = cleaner.clean(url, "gbk"); Object[] tagNodes = node.evaluateXPath("//p[@id='nv']/a"); for (Object tagNode : tagNodes) { System.out.println(((TagNode)tagNode).getText()); System.out.println(((TagNode)tagNode).getAttributeByName("href")); } } catch (Exception exception) { exception.printStackTrace(); } }}//该片段来自于http://byrx.net
用户点评