단어 개수WordCountapp.class

9368 단어 wordcount
public class WordCountApp {

    //       ,            ,      ,        .

    private static final String INPUT_PATH = "hdfs://hadoop1:9000/abd";//     

    private static final String OUT_PATH = "hdfs://hadoop1:9000/out";//     ,reduce            

    // _SUCCESS: linux ,                     .        .

    // _logs:       .

    // part-r-00000:            .   part  .r:reduce     ,map      m,00000   

    public static void main(String[] args) {

        Configuration conf = new Configuration();//     

        try {

            FileSystem fileSystem = FileSystem.get(new URI(OUT_PATH), conf);

            fileSystem.delete(new Path(OUT_PATH), true);

            Job job = new Job(conf, WordCountApp.class.getSimpleName());// jobName:    

            job.setJarByClass(WordCountApp.class);

            FileInputFormat.setInputPaths(job, INPUT_PATH);//        

            job.setMapperClass(MyMapper.class);//      map 

            job.setMapOutputKeyClass(Text.class);//   map  key   

            job.setMapOutputValueClass(LongWritable.class);//   map  value   

            job.setReducerClass(MyReducer.class);//      Reduce 

            job.setOutputKeyClass(Text.class);//   Reduce  key   

            job.setOutputValueClass(LongWritable.class);//   Reduce   value  

            FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));// Reduce     ,           ,         

            job.waitForCompletion(true);//    jobTracker     

        } catch (Exception e) {

            e.printStackTrace();

        }

    }



    /**

     *    key     :        .    value:         . MapReduce    :

     *     ,              ,       hdfs,        map         .   map   linux    ,

     *    shuffle ,reduce    http map          .

     * mapred-default.xml mapredcue.jobtracker

     * .root.dir,mapred.tmp.dir  map     .            ,             .

     */

    public static class MyMapper extends

            Mapper<LongWritable, Text, Text, LongWritable> {

        //         ,             .   <0,hello you>,<10,hello me>,  map        .

        //          ,      .

        @Override

        protected void map(LongWritable key, Text value, Context context)

                throws IOException, InterruptedException {

            //      hadoop     java  ?

            String line = value.toString();

            String[] splited = line.split("\t");

            //   hashMap      :          .

            Map<String, Integer> hashMap = new HashMap<String, Integer>();



            for (String word : splited) {

                //  for    ,    word        1

                context.write(new Text(word), new LongWritable(1));//           1   .

            }

        }

    }



    // map       ,map   <k,v>   4 .<hello,1>,<you,1>,<hello,1>,<me,1>

    // map        ,    reduce.

    //    shuffle  ,         .       .

    //                ,  .

    //       :<hello,1>,<hello,1>,<me,1>,<you,1>

    //       (  key value       ):<hello,{1,1}>,<me,{1}>,<you,{1}>

    //   (  )



    // map       reduce     shuffle

    public static class MyReducer extends

            Reducer<Text, LongWritable, Text, LongWritable> {

        //        reduce  ,       

        @Override

        protected void reduce(Text key, Iterable<LongWritable> values,

                Context context) throws IOException, InterruptedException {

            // count    key          

            //       reduce          .

            // reduce          <k,v>       ,      !

            long count = 0L;

            for (LongWritable times : values) {

                count += times.get();

            }

            context.write(key, new LongWritable(count));

        }

    }

}

좋은 웹페이지 즐겨찾기