httpclient+jsoup下载小说,,import java.
分享于 点击 25408 次 点评:187
httpclient+jsoup下载小说,,import java.
import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.File;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.FileWriter;import java.io.IOException;import java.io.InputStreamReader;import java.io.UnsupportedEncodingException;import java.util.LinkedList;import java.util.List;import javax.swing.JOptionPane;//import java.util.concurrent.TimeUnit;//import org.apache.commons.io.IOUtils;import org.apache.http.HttpEntity;import org.apache.http.HttpResponse;//import org.apache.http.HttpStatus;import org.apache.http.client.ClientProtocolException;import org.apache.http.client.ResponseHandler;import org.apache.http.client.methods.HttpGet;import org.apache.http.client.methods.HttpPost;//import org.apache.http.conn.ManagedClientConnection;import org.apache.http.impl.client.BasicResponseHandler;import org.apache.http.impl.client.DefaultHttpClient;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;public class httpClient { private static File tempFile=null; private static DefaultHttpClient httpClient=new DefaultHttpClient(); private List<String> urlList; private List<String> titleList; private String filePath; public String getFilePath() { return filePath; } public void setFilePath(String filePath) { this.filePath = filePath; } public List<String> getUrlList() { return urlList; } public void setUrlList(List<String> urlList) { this.urlList = urlList; } public List<String> getTitleList() { return titleList; } public void setTitleList(List<String> titleList) { this.titleList = titleList; }// private static HttpResponse response; /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub httpClient hc=new httpClient(); hc.setFilePath("F://小说//"); hc.run(); } public void run(){ JOptionPane.showMessageDialog(null, "开始下载!"); this.setUrlList(new LinkedList<String>()); this.setTitleList(new LinkedList<String>()); //获取章节列表 getOne("http://www.55885.com/chaojijiyinyouhuaye/",1); parseHtml("div.booklist span a","abs:href"); //获取保存的目录地址指向的文本 int cc=0;//统计本次下载章节数 for(int i=0;i<urlList.size();i++){ if(this.fileExists(getFilePath(),titleList.get(i)+".txt" )){ System.out.println(titleList.get(i)+".txt"+"已经存在"); continue; } getOne(urlList.get(i),1); parseHtml("div.bookcontent",getFilePath(),titleList.get(i)); System.out.println("剩余章节数目:"+(urlList.size()-i-1)); cc++; } JOptionPane.showMessageDialog(null, "下载完成!本次下载:"+cc+"章。"); } //不带登录的httpClient //不登陆获取指定url内容 public void getOne(String url){ HttpGet get=new HttpGet(url); ResponseHandler responseHandler=new BasicResponseHandler(); String txt = null; try { txt = httpClient.execute(get, responseHandler); } catch (ClientProtocolException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { get.abort(); } get.releaseConnection(); System.out.println(txt); } public static void getOne(String url,int k){ HttpGet get=new HttpGet(url); HttpResponse response; try { response = httpClient.execute(get); // 执行,返回状态码// System.out.println(response.getStatusLine()); HttpEntity entity = response.getEntity(); dump(entity); } catch (ClientProtocolException e) { // TODO Auto-generated catch block e.printStackTrace(); JOptionPane.showMessageDialog(null, "连接错误,请稍后重试!!"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); JOptionPane.showMessageDialog(null, "连接错误,请稍后重试!!"); } //关闭连接 get.releaseConnection(); } /** * 打印并保存页面到临时文件temp.temp * @param entity * @throws IOException */ private static void dump(HttpEntity entity) throws IOException { //临时文件 try { if(tempFile==null){ tempFile= File.createTempFile("temp", ".html"); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); JOptionPane.showMessageDialog(null, "创建临时文件错误!!"); } tempFile.deleteOnExit(); String s=null; BufferedReader br = new BufferedReader( new InputStreamReader(entity.getContent(), "GBK")); BufferedWriter bw =new BufferedWriter(new FileWriter(tempFile)); while((s=br.readLine()) != null){ bw.write(s); bw.newLine(); bw.flush(); } s=null; bw.close(); br.close();// System.out.println(IOUtils.toString(br)); } public void parseHtml(String aTags,String nameContent){ Document document=null;// if(tempFile.exists()){// System.out.println(tempFile.toString());// }else System.exit(0); try { document = Jsoup.parse(tempFile, "GBK"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); JOptionPane.showMessageDialog(null, "从临时文件,读取,转换错误!!"); } Elements elements=document.select(aTags); for (Element e : elements) {// System.out.println(e.html()); this.getUrlList().add(e.absUrl(nameContent)); String s=e.html().replace("?","").replace("?","").replaceAll("[ *]", ""); this.getTitleList().add(s); } } public void parseHtml(String aTags,String path,String fileName){ Document document=null;// if(tempFile.exists()){// System.out.println(tempFile.toString());// }else System.exit(0); try { document = Jsoup.parse(tempFile, "GBK"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); JOptionPane.showMessageDialog(null, "从临时文件,读取,转换错误!!"); } Elements elements=document.select(aTags); for (Element e : elements) { String temp="<a [^>]*>[^<]*</a>"; String temp1="<div [^>]*>[^<]*</div>"; String s=e.html().replaceAll("<br />", "\\r\\n").replaceAll(" ", "").replaceAll(temp, "").replaceAll(temp1, "");// System.out.println(s); saveContent(path+fileName+".txt",s); } } private void saveContent(String fileName,String txt) { // TODO Auto-generated method stub try { File file=new File(fileName); if(!file.exists()) file.createNewFile(); else { System.out.println(fileName+"已经存在!"); return; } FileOutputStream out=new FileOutputStream(file,false); StringBuffer sb=new StringBuffer(); sb.append(txt);// out.write(sb.toString().getBytes("utf-8"));//这个有乱码不好使 out.write(sb.toString().getBytes()); out.close(); System.out.println(fileName+"完成下载"); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); System.out.println(fileName+"保存文件错误!"); JOptionPane.showMessageDialog(null, "保存文件错误!"); } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); System.out.println(fileName+"保存文件错误!"); JOptionPane.showMessageDialog(null, "保存文件错误!"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); System.out.println(fileName+"保存文件错误!"); JOptionPane.showMessageDialog(null, "保存文件错误!"); } } //判断文件是否存在 public static Boolean fileExists(String filePath,String fileName){ File f=new File(filePath); if(f.exists()){ File file=new File(filePath+fileName); if(file.exists()){ return true; }else{ return false; } }else{ f.mkdir(); return false; } } public void postOneClient(){ if(httpClient!=null) httpClient=new DefaultHttpClient(); HttpPost post=new HttpPost(); post.setHeader("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3"); }}//该片段来自于http://byrx.net
用户点评