欢迎访问悦橙教程(wld5.com),关注java教程。悦橙教程  java问答|  每日更新
页面导航 : > > 文章正文

一个hadoop的简单测试例子,,从网上找了一个测试例子,

来源: javaer 分享于  点击 16359 次 点评:102

一个hadoop的简单测试例子,,从网上找了一个测试例子,


从网上找了一个测试例子,统计文本中指定某个单词出现的次数。调试了下发现几个bug,我把修改后的分享下。eclipse下编译vm参数:-Xms64m -Xmx512m程序参数

SingleWordCount.java

package com.run.ayena.distributed.test;import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;////统计文本中指定某个单词出现的次数public class SingleWordCount {    public static class SingleWordCountMapper extends            Mapper<Object, Text, Text, IntWritable> {        private final static IntWritable one = new IntWritable(1);        private Text val = new Text();        public void map(Object key, Text value, Context context)                throws IOException, InterruptedException {            StringTokenizer itr = new StringTokenizer(value.toString());            String keyword = context.getConfiguration().get("word");            while (itr.hasMoreTokens()) {                String nextkey = itr.nextToken();                if (nextkey.trim().equals(keyword)) {                    val.set(nextkey);                    context.write(val, one);                } else {                    // do nothing                }            }        }    }    public static class SingleWordCountReducer extends            Reducer<Text,IntWritable,Text,IntWritable> {        private IntWritable result = new IntWritable();        public void reduce(Text key, Iterable<IntWritable> values,                Context context) throws IOException, InterruptedException {            int sum = 0;            for (IntWritable val : values) {                sum += val.get();            }            result.set(sum);            context.write(key, result);        }    }    public static void main(String[] args) throws Exception {        Configuration conf = new Configuration();        String[] otherArgs = new GenericOptionsParser(conf, args)                .getRemainingArgs();        if (otherArgs.length != 3) {            System.err.println("Usage: wordcount  ");            System.exit(2);        }        // 输入指定的单词        conf.set("word", otherArgs[2]);        // 指定系统路        conf.set("mapred.system.dir", "/cygdrive/e/workspace_hadoop/SingleWordCount/");        // 设置运行的job名称        Job job = new Job(conf, "word count");        // 设置运行的job类        job.setJarByClass(SingleWordCount.class);        // 设置Mapper        job.setMapperClass(SingleWordCountMapper.class);        // 设置本地聚合类,该例本地聚合类同Reduer类        job.setCombinerClass(SingleWordCountReducer.class);        // 设置Reduer        job.setReducerClass(SingleWordCountReducer.class);        // 设置Map的输出        job.setMapOutputKeyClass(Text.class);        job.setMapOutputValueClass(IntWritable.class);        // 设置Reducer输出的key类型        job.setOutputKeyClass(Text.class);        // 设置Reducer输出的value类型        job.setOutputValueClass(IntWritable.class);        // 设置输入和输出的目录        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));        // 执行,直到结束就退出        System.exit(job.waitForCompletion(true) ? 0 : 1);    }}
相关栏目:

用户点评