抓取国家统计局网站上的最新县及县以上行政区划代码(apache camel版),apachecamel,private stat
分享于 点击 2486 次 点评:240
抓取国家统计局网站上的最新县及县以上行政区划代码(apache camel版),apachecamel,private stat
private static Pattern pattern = Pattern.compile("'(t\\\\d{8}_\\\\d{8,}\\\\.htm)'",Pattern.CASE_INSENSITIVE); private static Pattern pattern1 = Pattern.compile("<span class=\\"content\\">(.*?)</span>",Pattern.CASE_INSENSITIVE);from("timer://foo?fixedRate=true&delay=0&period=864000")//每隔24小时 .to("http4://www.stats.gov.cn/tjbz/xzqhdm/")//从那个地方 .process(new Processor() {//处理下结果,从中regex出最新数据的url @Override public void process(Exchange exchange) throws Exception { Matcher m = pattern.matcher(exchange.getIn().getBody(String.class)); if(m.find()){ in.setHeader("newurl", "http4://www.stats.gov.cn/tjbz/xzqhdm/" + m.group(1)); } } }) .setHeader(Exchange.HTTP_URI,header("newurl")) .setProperty(Exchange.CHARSET_NAME, constant("GBK")) .to("http4://will.be.override")//从最新的url或者最新的数据// .setBody().javaScript("'abc'") .process(new Processor() {//再处理下 @Override public void process(Exchange exchange) throws Exception { Matcher m = pattern1.matcher(exchange.getIn().getBody(String.class)); if(m.find()){ String c = m.group(1); c = c.replaceAll("(( )|\\\\s)+", ","); String[] ccc = c.split("<BR>"); StringBuffer sb = new StringBuffer(); for(String s : ccc){ sb.append(s); sb.append("\\n"); } in.setBody(sb.toString()); } } }) .setHeader(Exchange.FILE_NAME, constant("m3958.html")) .to("file:target/m3958");//到文件中//该片段来自于http://byrx.net
用户点评