抓取搜索360软件管家 软件下载地址,360管家,通过抓数据方法 找到36
分享于 点击 35209 次 点评:142
抓取搜索360软件管家 软件下载地址,360管家,通过抓数据方法 找到36
通过抓数据方法 找到360安全卫士的软件管家的搜索获取到url下载地址 直接粘贴迅雷即可下载
import java.io.UnsupportedEncodingException;import java.net.URLDecoder;import java.net.URLEncoder;import java.util.HashMap;import java.util.Map;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;import org.openqa.jetty.util.UrlEncoded;import com.steven.downmove.HttpClientUtil;public class Downloads { /** * @param args * @throws UnsupportedEncodingException */ public static void main(String[] args) throws UnsupportedEncodingException { String keyword="开源社区"; keyword=URLEncoder.encode(keyword,"gbk"); //此类为httpclient封装的 就是直接获取网页内容 大家自己动手写个吧 这个就不发大家了 HttpClientUtil hcu = new HttpClientUtil(); String url = "http://baoku.360.cn/soft/search?kw="+keyword; String count = hcu.getResponseWithHttpClient(url, "gbk"); Document doc = Jsoup.parse(count); Element ele = doc.select("div[class=main-list fr]").first(); String info_data=ele.select("div[class=info-data]").first().text(); System.out.println(info_data+"\\n"); Elements searchpage_list = ele.select("div[id=searchpage-list] > dl"); int temp=0; for (Element searchpage : searchpage_list) { String href = searchpage.select("dd > div[class=h2] > strong >a").attr("href"); //下载地址 String down = "http://q.soft.360.cn/get_download_url.php?type=download_url&soft_ids="+href.substring(href.lastIndexOf("/")+1, href.length()); String downurl=hcu.getResponseWithHttpClient(down, "gbk"); Document dos = Jsoup.parse(downurl); String urldown = dos.select("ret > softs > soft > durls").first().text(); urldown=urldown.substring(urldown.indexOf("http"), urldown.length()); String title = searchpage.select("dd > div[class=h2] > strong >a").first().text(); String explain = searchpage.select("dd > p").first().text(); String size = searchpage.select("dd > p").last().text(); System.out.println(title+"\\n"+urldown+"\\n"+explain+"\\n"+size+"\\n"); temp++; } System.out.println("共"+temp+"条"); }}//该片段来自于http://byrx.net
用户点评