欢迎访问悦橙教程(wld5.com),关注java教程。悦橙教程  java问答|  每日更新
页面导航 : > > 文章正文

Java读取Html文本解析email地址的代码,javaemail,package com.

来源: javaer 分享于  点击 5748 次 点评:206

Java读取Html文本解析email地址的代码,javaemail,package com.


package com.alpha.test;import java.io.BufferedReader;import java.io.File;import java.io.FileNotFoundException;import java.io.FileReader;import java.io.FileWriter;import java.io.IOException;import java.io.InputStreamReader;import java.io.Reader;import java.io.Writer;import java.net.MalformedURLException;import java.net.URL;import java.net.URLConnection;import java.util.regex.Matcher;import java.util.regex.Pattern;/** * 读取html页面文件解析邮箱地址 *  * @author JavaAlpha 2012-12-19 13:45:11 */public class ReadHtmlToTxt { // 读取文件 public static String readHtml(String path) {  StringBuffer emailCont = new StringBuffer();  File htmlFile = new File(path);  if (htmlFile.exists() && htmlFile.isFile() && htmlFile.canRead()) {   Reader in;   try {    in = new FileReader(htmlFile);    char[] buff = new char[4096];    int nch;    while ((nch = in.read(buff, 0, buff.length)) != -1) {     emailCont.append(checkEmail(new String(buff, 0, nch)));    }   } catch (FileNotFoundException e) {    e.printStackTrace();   } catch (IOException e) {    e.printStackTrace();   }  }  return emailCont.toString(); } // 判断字符串里面是否包括@符号 public static String checkEmail(String str) {  String postCont = "";  // 判断是否回复的内容  if (str.indexOf("@") > -1) {   postCont = str.substring(str.indexOf("@") - 10,     str.indexOf("@") + 10);   if (postCont.indexOf(">") > -1 || postCont.indexOf("<") > -1) {    postCont = postCont.replaceAll(">", "");    postCont = postCont.replaceAll("<", "");    postCont = postCont.replaceAll("/", "");   }   if (postCont.indexOf(",") > -1 || postCont.indexOf(",") > -1     || postCont.indexOf("。") > -1 || postCont.indexOf(";") > -1) {    postCont = postCont.replaceAll(",", "");    postCont = postCont.replaceAll(",", "");    postCont = postCont.replaceAll("。", "");   }   postCont = postCont.substring(0, postCont.indexOf(".com") + 4);   System.out.println(postCont);  }  return postCont; } //过滤汉字 public static boolean checkChinese(String str) {  String regEx = "[\\u4e00-\\u9fa5]";  Pattern p = Pattern.compile(regEx);  Matcher m = p.matcher(str);  if (m != null && m.find()){   return true;//是汉字  }  return false; } // 将整理是邮箱地址写入文件 public static void writerFile(String cont, String path) {  File emailFile = new File(path);  try {   //如果文件不存在,创建文件   if (!emailFile.exists()) {    emailFile.createNewFile();   }   Writer out = new FileWriter(emailFile);   out.write(cont);   out.flush();   out.close();  } catch (Exception e) {   e.printStackTrace();  } } /**  * 读取网络内容   */ public static void readUrlCont(String strUrl) {  StringBuffer cont = new StringBuffer();//内容  try {   URL url = new URL(strUrl);   URLConnection conn = url.openConnection();   BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));   String lineCont = "";   while ((lineCont = reader.readLine())!= null) {    cont.append(lineCont+"</br>");   }   reader.close();  } catch (MalformedURLException e) {   e.printStackTrace();  } catch (IOException e) {   e.printStackTrace();  }  System.out.println(cont.toString()); } public static void main(String[] args) {  //String cont = readHtml("e://test.htm");//读取文件  //writerFile(cont, "e://test.txt");//写文件  //checkChinese("qwe123");  readUrlCont("http://www.163.com"); }}
相关栏目:

用户点评