欢迎访问悦橙教程(wld5.com),关注java教程。悦橙教程  java问答|  每日更新
页面导航 : > > 文章正文

抓取搜索360软件管家 软件下载地址,360管家,通过抓数据方法 找到36

来源: javaer 分享于  点击 35209 次 点评:142

抓取搜索360软件管家 软件下载地址,360管家,通过抓数据方法 找到36


通过抓数据方法 找到360安全卫士的软件管家的搜索获取到url下载地址 直接粘贴迅雷即可下载

import java.io.UnsupportedEncodingException;import java.net.URLDecoder;import java.net.URLEncoder;import java.util.HashMap;import java.util.Map;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;import org.openqa.jetty.util.UrlEncoded;import com.steven.downmove.HttpClientUtil;public class Downloads {    /**     * @param args     * @throws UnsupportedEncodingException      */    public static void main(String[] args) throws UnsupportedEncodingException {        String keyword="开源社区";        keyword=URLEncoder.encode(keyword,"gbk");                  //此类为httpclient封装的 就是直接获取网页内容 大家自己动手写个吧 这个就不发大家了        HttpClientUtil hcu = new HttpClientUtil();        String url = "http://baoku.360.cn/soft/search?kw="+keyword;        String count = hcu.getResponseWithHttpClient(url, "gbk");        Document doc = Jsoup.parse(count);        Element ele = doc.select("div[class=main-list fr]").first();        String info_data=ele.select("div[class=info-data]").first().text();        System.out.println(info_data+"\\n");        Elements searchpage_list = ele.select("div[id=searchpage-list] > dl");        int temp=0;        for (Element searchpage : searchpage_list) {            String href = searchpage.select("dd > div[class=h2] > strong >a").attr("href");            //下载地址            String down = "http://q.soft.360.cn/get_download_url.php?type=download_url&soft_ids="+href.substring(href.lastIndexOf("/")+1, href.length());            String downurl=hcu.getResponseWithHttpClient(down, "gbk");            Document dos = Jsoup.parse(downurl);            String urldown = dos.select("ret > softs > soft > durls").first().text();            urldown=urldown.substring(urldown.indexOf("http"), urldown.length());            String title = searchpage.select("dd > div[class=h2] > strong >a").first().text();            String explain = searchpage.select("dd > p").first().text();            String size = searchpage.select("dd > p").last().text();            System.out.println(title+"\\n"+urldown+"\\n"+explain+"\\n"+size+"\\n");            temp++;        }        System.out.println("共"+temp+"条");    }}//该片段来自于http://byrx.net
相关栏目:

用户点评