欢迎访问悦橙教程(wld5.com),关注java教程。悦橙教程  java问答|  每日更新
页面导航 : > > 文章正文

httpclient+jsoup下载小说,,import java.

来源: javaer 分享于  点击 25408 次 点评:187

httpclient+jsoup下载小说,,import java.


import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.File;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.FileWriter;import java.io.IOException;import java.io.InputStreamReader;import java.io.UnsupportedEncodingException;import java.util.LinkedList;import java.util.List;import javax.swing.JOptionPane;//import java.util.concurrent.TimeUnit;//import org.apache.commons.io.IOUtils;import org.apache.http.HttpEntity;import org.apache.http.HttpResponse;//import org.apache.http.HttpStatus;import org.apache.http.client.ClientProtocolException;import org.apache.http.client.ResponseHandler;import org.apache.http.client.methods.HttpGet;import org.apache.http.client.methods.HttpPost;//import org.apache.http.conn.ManagedClientConnection;import org.apache.http.impl.client.BasicResponseHandler;import org.apache.http.impl.client.DefaultHttpClient;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;public class httpClient {    private static File tempFile=null;    private static DefaultHttpClient httpClient=new DefaultHttpClient();    private List<String> urlList;    private List<String> titleList;    private String filePath;    public String getFilePath() {        return filePath;    }    public void setFilePath(String filePath) {        this.filePath = filePath;    }    public  List<String> getUrlList() {        return urlList;    }    public void setUrlList(List<String> urlList) {        this.urlList = urlList;    }    public List<String> getTitleList() {        return titleList;    }    public void setTitleList(List<String> titleList) {        this.titleList = titleList;    }//  private static HttpResponse response;    /**     * @param args     */    public static void main(String[] args) {        // TODO Auto-generated method stub        httpClient hc=new httpClient();        hc.setFilePath("F://小说//");        hc.run();    }    public void run(){        JOptionPane.showMessageDialog(null, "开始下载!");        this.setUrlList(new LinkedList<String>());        this.setTitleList(new LinkedList<String>());        //获取章节列表        getOne("http://www.55885.com/chaojijiyinyouhuaye/",1);        parseHtml("div.booklist span a","abs:href");        //获取保存的目录地址指向的文本        int cc=0;//统计本次下载章节数        for(int i=0;i<urlList.size();i++){            if(this.fileExists(getFilePath(),titleList.get(i)+".txt" )){                System.out.println(titleList.get(i)+".txt"+"已经存在");                continue;            }            getOne(urlList.get(i),1);            parseHtml("div.bookcontent",getFilePath(),titleList.get(i));            System.out.println("剩余章节数目:"+(urlList.size()-i-1));            cc++;        }        JOptionPane.showMessageDialog(null, "下载完成!本次下载:"+cc+"章。");    }    //不带登录的httpClient    //不登陆获取指定url内容    public void getOne(String url){        HttpGet get=new HttpGet(url);        ResponseHandler responseHandler=new BasicResponseHandler();        String txt = null;        try {            txt = httpClient.execute(get, responseHandler);        } catch (ClientProtocolException e) {            e.printStackTrace();        } catch (IOException e) {            e.printStackTrace();        } finally {            get.abort();        }        get.releaseConnection();        System.out.println(txt);    }    public static void getOne(String url,int k){        HttpGet get=new HttpGet(url);        HttpResponse response;        try {            response = httpClient.execute(get);            // 执行,返回状态码//          System.out.println(response.getStatusLine());            HttpEntity entity = response.getEntity();            dump(entity);        } catch (ClientProtocolException e) {            // TODO Auto-generated catch block            e.printStackTrace();            JOptionPane.showMessageDialog(null, "连接错误,请稍后重试!!");        } catch (IOException e) {            // TODO Auto-generated catch block            e.printStackTrace();            JOptionPane.showMessageDialog(null, "连接错误,请稍后重试!!");        }        //关闭连接        get.releaseConnection();    }    /**     * 打印并保存页面到临时文件temp.temp     * @param entity     * @throws IOException     */    private static void dump(HttpEntity entity) throws IOException {        //临时文件        try {            if(tempFile==null){                tempFile= File.createTempFile("temp", ".html");            }        } catch (IOException e) {            // TODO Auto-generated catch block            e.printStackTrace();            JOptionPane.showMessageDialog(null, "创建临时文件错误!!");        }        tempFile.deleteOnExit();        String s=null;        BufferedReader br = new BufferedReader(                new InputStreamReader(entity.getContent(), "GBK"));        BufferedWriter bw =new BufferedWriter(new FileWriter(tempFile));        while((s=br.readLine()) != null){            bw.write(s);            bw.newLine();            bw.flush();        }        s=null;        bw.close();        br.close();//      System.out.println(IOUtils.toString(br));    }    public void parseHtml(String aTags,String nameContent){        Document document=null;//      if(tempFile.exists()){//          System.out.println(tempFile.toString());//      }else System.exit(0);        try {            document = Jsoup.parse(tempFile, "GBK");        } catch (IOException e) {            // TODO Auto-generated catch block            e.printStackTrace();            JOptionPane.showMessageDialog(null, "从临时文件,读取,转换错误!!");        }        Elements elements=document.select(aTags);        for (Element e : elements) {//          System.out.println(e.html());            this.getUrlList().add(e.absUrl(nameContent));            String s=e.html().replace("?","").replace("?","").replaceAll("[ *]", "");            this.getTitleList().add(s);        }    }    public void parseHtml(String aTags,String path,String fileName){        Document document=null;//      if(tempFile.exists()){//          System.out.println(tempFile.toString());//      }else System.exit(0);        try {            document = Jsoup.parse(tempFile, "GBK");        } catch (IOException e) {            // TODO Auto-generated catch block            e.printStackTrace();            JOptionPane.showMessageDialog(null, "从临时文件,读取,转换错误!!");        }        Elements elements=document.select(aTags);        for (Element e : elements) {            String temp="<a [^>]*>[^<]*</a>";            String temp1="<div [^>]*>[^<]*</div>";            String s=e.html().replaceAll("<br />", "\\r\\n").replaceAll(" ", "").replaceAll(temp, "").replaceAll(temp1, "");//          System.out.println(s);            saveContent(path+fileName+".txt",s);        }    }    private void saveContent(String fileName,String txt) {        // TODO Auto-generated method stub        try {            File file=new File(fileName);            if(!file.exists())                file.createNewFile();            else {                System.out.println(fileName+"已经存在!");                return;            }            FileOutputStream out=new FileOutputStream(file,false);                       StringBuffer sb=new StringBuffer();                sb.append(txt);//              out.write(sb.toString().getBytes("utf-8"));//这个有乱码不好使                out.write(sb.toString().getBytes());            out.close();            System.out.println(fileName+"完成下载");        } catch (FileNotFoundException e) {            // TODO Auto-generated catch block            e.printStackTrace();            System.out.println(fileName+"保存文件错误!");            JOptionPane.showMessageDialog(null, "保存文件错误!");        } catch (UnsupportedEncodingException e) {            // TODO Auto-generated catch block            e.printStackTrace();            System.out.println(fileName+"保存文件错误!");            JOptionPane.showMessageDialog(null, "保存文件错误!");        } catch (IOException e) {            // TODO Auto-generated catch block            e.printStackTrace();            System.out.println(fileName+"保存文件错误!");            JOptionPane.showMessageDialog(null, "保存文件错误!");        }    }    //判断文件是否存在    public static Boolean fileExists(String filePath,String fileName){        File f=new File(filePath);        if(f.exists()){            File file=new File(filePath+fileName);            if(file.exists()){                return true;            }else{                return false;            }        }else{        f.mkdir();        return false;        }    }    public void postOneClient(){        if(httpClient!=null)        httpClient=new DefaultHttpClient();        HttpPost post=new HttpPost();        post.setHeader("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3");    }}//该片段来自于http://byrx.net
相关栏目:

用户点评