欢迎访问悦橙教程(wld5.com),关注java教程。悦橙教程  java问答|  每日更新
页面导航 : > > 文章正文

jsoup 提取 html 中的所有链接、图片和媒体,,package org.

来源: javaer 分享于  点击 21630 次 点评:156

jsoup 提取 html 中的所有链接、图片和媒体,,package org.


package org.jsoup.examples;import org.jsoup.Jsoup;import org.jsoup.helper.Validate;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;import java.io.IOException;/** * Example program to list links from a URL. */public class ListLinks {    public static void main(String[] args) throws IOException {        Validate.isTrue(args.length == 1, "usage: supply url to fetch");        String url = args[0];        print("Fetching %s...", url);        Document doc = Jsoup.connect(url).get();        Elements links = doc.select("a[href]");        Elements media = doc.select("[src]");        Elements imports = doc.select("link[href]");        print("\\nMedia: (%d)", media.size());        for (Element src : media) {            if (src.tagName().equals("img"))                print(" * %s: <%s> %sx%s (%s)",                        src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"),                        trim(src.attr("alt"), 20));            else                print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));        }        print("\\nImports: (%d)", imports.size());        for (Element link : imports) {            print(" * %s <%s> (%s)", link.tagName(),link.attr("abs:href"), link.attr("rel"));        }        print("\\nLinks: (%d)", links.size());        for (Element link : links) {            print(" * a: <%s>  (%s)", link.attr("abs:href"), trim(link.text(), 35));        }    }    private static void print(String msg, Object... args) {        System.out.println(String.format(msg, args));    }    private static String trim(String s, int width) {        if (s.length() > width)            return s.substring(0, width-1) + ".";        else            return s;    }}
相关栏目:

用户点评