新浪RSS解析,比较老的一个话题啦,新浪rss解析,代码结构不复杂Main.
分享于 点击 3092 次 点评:180
新浪RSS解析,比较老的一个话题啦,新浪rss解析,代码结构不复杂Main.
代码结构不复杂Main.java 主类News.java和Outline.java 一条新闻对象和一个新闻频道对象PublicUtils.java工具类很简单,就一个读取配置文件中新浪RSS的地址的方法Dom4jXmlParser.java 这个类是核心,根据工具类读取进来的地址进行解析XML文档,获取到各个频道的集合,然后再迭代解析各个频道内的XML获取新闻的集合,用News(新闻)和Outline(频道)封装
[Java]代码
文件:resource_sina.properties内容:url=http://rss.sina.com.cn/sina_all_opml.xml作用:存放新浪RSS接口地址的配置文件
[Java]代码
package cn.outofmemory.code;import java.net.URL;import java.text.SimpleDateFormat;import java.util.Date;import java.util.Iterator;import java.util.List;import java.util.Map;import java.util.Map.Entry;import cn.tyz.domain.News;import cn.tyz.domain.Outline;import cn.tyz.utils.PublicUtils;import cn.tyz.xml.Dom4jXmlParser;public class Main{ public static void main(String[] args) { try { // 获取各个频道名称及子频道集合,并迭代 Map<String, List<Outline>> map_Channels = Dom4jXmlParser.getLinkAsXmlData(new URL(PublicUtils.getUrl())); Iterator<Entry<String, List<Outline>>> iterator_Channels = map_Channels.entrySet().iterator(); while(iterator_Channels.hasNext()) { // 一个频道 Entry<String, List<Outline>> entry = iterator_Channels.next(); String channelName = entry.getKey(); // 频道名称 List<Outline> outlines = entry.getValue(); // 频道内子频道项目 System.out.println("模块名称:"+channelName); // 迭代子频道 Iterator<Outline> iterator_Item = outlines.iterator(); while(iterator_Item.hasNext()) { // 一个子频道 Outline outline = iterator_Item.next(); System.out.println(outline.getText() + ":" + outline.getXmlUrl() + "内容:\n"); URL url = new URL(outline.getXmlUrl()); // 根据子频道的XMLURL获取新闻集合,并迭代 List<News> list_News = Dom4jXmlParser.getNewsAsXmlData(url); Iterator<News> iterator_News = list_News.iterator(); while(iterator_News.hasNext()) { // 一条新闻 News news = iterator_News.next(); System.out.println(news.getTitle().trim()); System.out.println(news.getDescription().trim()); System.out.println(news.getPubDate().trim()); System.out.println(news.getLink().trim()); System.out.println(news.getCategory().trim()); System.out.println(news.getComments().trim()); System.out.println("\n"); } } } } catch (Exception e) { e.printStackTrace(); } }}
[Java]代码
package cn.tyz.utils;import java.io.IOException;import java.io.InputStream;import java.util.Properties;public class PublicUtils{ // 读取资源文件中的URL public static String getUrl() { String url = ""; InputStream in = PublicUtils.class.getClassLoader() .getResourceAsStream("resource_sina.properties"); Properties properties = new Properties(); try { properties.load(in); } catch (IOException e) { e.printStackTrace(); } url = properties.getProperty("url"); return url; }}
[Java]代码
package cn.tyz.domain;public class News{ private String title; private String link; private String author; private String guid; private String category; private String pubDate; private String comments; private String description; public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getLink() { return link; } public void setLink(String link) { this.link = link; } public String getAuthor() { return author; } public void setAuthor(String author) { this.author = author; } public String getGuid() { return guid; } public void setGuid(String guid) { this.guid = guid; } public String getCategory() { return category; } public void setCategory(String category) { this.category = category; } public String getPubDate() { return pubDate; } public void setPubDate(String pubDate) { this.pubDate = pubDate; } public String getComments() { return comments; } public void setComments(String comments) { this.comments = comments; } public String getDescription() { return description; } public void setDescription(String description) { this.description = description; }}
[Java]代码
package cn.tyz.domain;public class Outline{ private String title; private String text; private String type; private String xmlUrl; private String htmlUrl; public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getText() { return text; } public void setText(String text) { this.text = text; } public String getType() { return type; } public void setType(String type) { this.type = type; } public String getXmlUrl() { return xmlUrl; } public void setXmlUrl(String xmlUrl) { this.xmlUrl = xmlUrl; } public String getHtmlUrl() { return htmlUrl; } public void setHtmlUrl(String htmlUrl) { this.htmlUrl = htmlUrl; }}
[Java]代码
package cn.tyz.xml;import java.net.URL;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import java.util.List;import java.util.Map;import org.dom4j.Document;import org.dom4j.Element;import org.dom4j.io.SAXReader;import cn.tyz.domain.News;import cn.tyz.domain.Outline;public class Dom4jXmlParser{ // 根据URL解析大纲XML数据,获取各个频道的名称及频道内的子频道的名称和XML数据链接 public static Map<String, List<Outline>> getLinkAsXmlData(URL url) throws Exception { Map<String, List<Outline>> map_Channels = new HashMap<String, List<Outline>>(); // 加载XML到内存解析并得到Document对象 SAXReader reader = new SAXReader(); Document document = reader.read(url); // 获取根节点 Element root = document.getRootElement(); // 获取根节点下的body,然后获取其下的所有一级的outline节点,并迭代 List<Element> ele_Channels = root.element("body").elements("outline"); Iterator<Element> iterator_Channel = ele_Channels.iterator(); while(iterator_Channel.hasNext()) { // 频道名称节点 Element ele_Channel = iterator_Channel.next(); // 获取频道名称 String channelName = ele_Channel.attributeValue("text"); // 获取当前频道下的所有outline节点,并迭代 List<Element> ele_Items = ele_Channel.elements("outline"); Iterator<Element> iterator_Item = ele_Items.iterator(); // 存放一个频道内的子频道的集合 List<Outline> list_Items = new ArrayList<Outline>(); while(iterator_Item.hasNext()) { // 子频道节点 Element ele_Item = iterator_Item.next(); Outline outline = new Outline(); outline.setTitle(ele_Item.attributeValue("title")); outline.setText(ele_Item.attributeValue("text")); outline.setType(ele_Item.attributeValue("type")); outline.setXmlUrl(ele_Item.attributeValue("xmlUrl")); outline.setHtmlUrl(ele_Item.attributeValue("htmlUrl")); list_Items.add(outline); } map_Channels.put(channelName, list_Items); } return map_Channels; } // 根据URL解析单个子频道内的XML新闻数据 public static List<News> getNewsAsXmlData(URL url) throws Exception { // 加载XML到内存解析并得到Document对象 SAXReader reader = new SAXReader(); Document document = reader.read(url); // 获取根节点 Element root = document.getRootElement(); // 获取所有新闻条目的集合,并迭代 List<Element> ele_Items = root.element("channel").elements("item"); Iterator<Element> iterator_Item = ele_Items.iterator(); // 存放所有新闻项的集合 List<News> list_News = new ArrayList<News>(); while(iterator_Item.hasNext()) { // 新闻项节点 Element ele_Item = iterator_Item.next(); News news = new News(); news.setTitle(ele_Item.elementText("title")); news.setLink(ele_Item.elementText("link")); news.setAuthor(ele_Item.elementText("author")); news.setGuid(ele_Item.elementText("guid")); news.setCategory(ele_Item.elementText("category")); news.setPubDate(ele_Item.elementText("pubDate")); news.setComments(ele_Item.elementText("comments")); news.setDescription(ele_Item.elementText("description")); list_News.add(news); } return list_News; }}
用户点评