Storm wordcount Read from file

17549 단어 wordcount
source code:
package stormdemo;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;

public class WordCountTopology {
  public static class WordReader extends BaseRichSpout {
        private static final long serialVersionUID = 1L;
        private SpoutOutputCollector collector;
        private FileReader fileReader;
        private boolean completed = false;
        public void ack(Object msgId) {
            System.out.println("OK:"+msgId);
        }
        public void close() {}
        public void fail(Object msgId) {
            System.out.println("FAIL:"+msgId);
        }
        /**The only thing that the methods will do It is emit each  file line*/
        public void nextTuple() {
            /**
             * The nextuple it is called forever, so if we have been readed the file
             * we will wait and then return
             */
            if(completed){
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                    //Do nothing
                }
                return;
            }
            String str;
            //Open the reader
            BufferedReader reader = new BufferedReader(fileReader);
            try{
                //Read all lines
                while((str = reader.readLine()) != null){
                    /**
                     * By each line emmit a new value with the line as a their
                     */
                    this.collector.emit(new Values(str),str);
                }
            }catch(Exception e){
                throw new RuntimeException("Error reading tuple",e);
            }finally{
                completed = true;
            }
        }

        /**
         * We will create the file and get the collector object
         */
        public void open(@SuppressWarnings("rawtypes") Map conf, TopologyContext context,
                SpoutOutputCollector collector) {
            try {
                this.fileReader = new FileReader(conf.get("wordsFile").toString());
            } catch (FileNotFoundException e) {
                throw new RuntimeException("Error reading file ["+conf.get("wordsFile")+"]");
            }
            this.collector = collector;
        }

        /**
         * Declare the output field "line"
         */
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            declarer.declare(new Fields("line"));
        }
    }

  public static class WordNormalizer extends BaseBasicBolt {

  private static final long serialVersionUID = 3L;

        public void cleanup() {}
        public void execute(Tuple input, BasicOutputCollector collector) {
            String sentence = input.getString(0);
            String[] words = sentence.split(" ");
            for(String word : words){
                word = word.trim();
                if(!word.isEmpty()){
                    word = word.toLowerCase();
                    collector.emit(new Values(word));
                }
            }
        }

        /**
         * The bolt will only emit the field "word"
         */
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            declarer.declare(new Fields("word"));
        }
    }
  public static class WordCount extends BaseBasicBolt {
    private static final long serialVersionUID = 2L;
    Map<String, Integer> counts = new HashMap<String, Integer>();
    BufferedWriter output = null;
    public void execute(Tuple tuple, BasicOutputCollector collector) {
      String word = tuple.getString(0);
      Integer count = counts.get(word);
      if (count == null)
        count = 0;
      count++;
      counts.put(word, count);
      //collector.emit(new Values(word, count));
      try {
          output = new BufferedWriter(new FileWriter("/home/hadoop/wordcounts.txt",false )); 
          } catch (IOException e) {
              e.printStackTrace();
            try {
                   output.close();
                } catch (IOException e1) {  e1.printStackTrace();  }
       }
      for(Map.Entry<String, Integer> entry : counts.entrySet()){
          try {
            output.write(entry.getKey()+": "+entry.getValue());
            output.newLine();
            output.flush();
        } catch (IOException e) {
            e.printStackTrace();
        }
  }   
    }
     public void declareOutputFields(OutputFieldsDeclarer declarer) {
      declarer.declare(new Fields("word", "count"));
    }
  }

  public static void  main(String[] args) throws Exception {

    TopologyBuilder builder = new TopologyBuilder();

    builder.setSpout("spout", new WordReader());
    builder.setBolt("split", new WordNormalizer()).shuffleGrouping("spout");
    builder.setBolt("count", new WordCount()).globalGrouping("split");

    Config conf = new Config();
    conf.put("wordsFile", args[0]);
    conf.setDebug(false);
    //Topology run
     if (args != null && args.length > 1) {
        conf.setNumWorkers(2);
        StormSubmitter.submitTopology(args[1], conf, builder.createTopology());
    }   
    else {
        conf.put(Config.TOPOLOGY_MAX_SPOUT_PENDING, 1);
        LocalCluster cluster = new LocalCluster();
        cluster.submitTopology("wordcount", conf, builder.createTopology());
        Thread.sleep(1000);
        cluster.shutdown();
    }

  }
}

start zookeeper.(zkServer.sh start at namenode,datanode01,datanode02)
start storm nimbus at namenode.
start storm supervisor at datanode01 and datanode02;
at namenode:
cd/home/hadoop/workspace
cd/stormsample
mvn install
storm jar storm-example-0.0.1-SNAPSHOT.jar stormdemo.WordCountTopology/home/hadoop/wordinput.txt wordcount
first, you should prepare text file for the source, I put one txt file wordinput.txt in datanode01/02/home/hadoop/.
after running job, I found wordcount.txt at datanode01 node.

좋은 웹페이지 즐겨찾기