Storm---DirectGroup(直接分组)
以单词分割计数为例实现Storm的DirectGroup分组:
1、Spout实现
Spout是Storm数据源头,使用DirectGroup方式将Spout数据发送指定的Bolt,需注意:
1)、Spout消费的Bolt的Task(Task应为Storm的Executor的编号),在如下代码中Spout.open()初始化中拿到消费Task
2)、需使用SpoutOutputCollector.emitDirect()方法
3)、将Spout声明为直接流,即在Spout.declareOutputFields()声明
/**
* Fixed Cycle Spout
*
* @author hanhan.zhang
* */
public class FixedCycleSpout implements IRichSpout { private String _fieldName; private boolean _direct; // stream mark
private String _streamId; private int _index; // key = msgId, value = sending tuple
private Map<String, List<Object>> _pendingTuple; // send tuple
private List<Object> [] _sendTuple; private SpoutOutputCollector _collector;
private CountMetric _sendMetric;
private CountMetric _failMetric; // consume task set
private List<Integer> _consumeTaskIdList; public FixedCycleSpout(String _streamId, String _fieldName, boolean _direct, List<Object> ... _sendTuple) {
this._streamId = _streamId;
this._fieldName = _fieldName;
this._direct = _direct;
this._sendTuple = _sendTuple;
} @Override
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
this._index = 0;
_pendingTuple = Maps.newHashMap(); // register metric
this._sendMetric = context.registerMetric("cycle.spout.send.tuple.metric", new CountMetric(), 60);
this._failMetric = context.registerMetric("cycle.spout.fail.tuple.metric", new CountMetric(), 60);
this._collector = collector; // get consume task id
if (this._direct) {
this._consumeTaskIdList = Lists.newLinkedList();
Map<String, Map<String, Grouping>> consumeTargets = context.getThisTargets();
if (consumeTargets != null && !consumeTargets.isEmpty()) {
// streamId = this._streamId
consumeTargets.forEach((streamId, target) -> {
if (target != null && !target.isEmpty()) {
// componentId = consume target component Id
target.forEach((componentId, group) -> {
if (group.is_set_direct()) {
this._consumeTaskIdList.addAll(context.getComponentTasks(componentId));
}
});
}
});
}
}
} @Override
public void close() { } @Override
public void activate() { } @Override
public void deactivate() { } @Override
public void nextTuple() {
this._sendMetric.incr();
if (this._index == _sendTuple.length) {
this._index = 0;
}
String msgId = UUID.randomUUID().toString();
List<Object> tuple = this._sendTuple[this._index++];
sendTuple(msgId, tuple);
} @Override
public void ack(Object msgId) {
String msgIdStr = (String) msgId;
System.out.println("ack tuple with msgId " + msgIdStr);
this._pendingTuple.remove(msgIdStr);
} @Override
public void fail(Object msgId) {
this._failMetric.incr();
String msgIdStr = (String) msgId;
System.out.println("fail tuple with msgId " + msgIdStr);
sendTuple(msgIdStr, this._pendingTuple.get(msgIdStr));
} @Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declareStream(this._streamId, this._direct, new Fields(_fieldName));
} @Override
public Map<String, Object> getComponentConfiguration() {
return null;
} protected void sendTuple(String msgId, List<Object> tuple) {
this._pendingTuple.put(msgId, tuple);
if (this._direct) {
if (this._consumeTaskIdList == null || this._consumeTaskIdList.isEmpty()) {
throw new IllegalStateException("direct task is empty !");
}
this._consumeTaskIdList.forEach(taskId ->
this._collector.emitDirect(taskId, this._streamId, tuple, msgId));
} else {
this._collector.emit(tuple, msgId);
}
}
}
2、Bolt实现
/**
* Sentence Split Bolt
*
* @author hanhan.zhang
* */
public class SentenceSplitBolt implements IRichBolt { private OutputCollector _collector; private CountMetric _ackMetric; private CountMetric _failMetric; private String _separator; private int _taskId; private boolean _direct; private String _streamId; public SentenceSplitBolt(String _streamId, boolean _direct) {
this._streamId = _streamId;
this._direct = _direct;
} /**
* @param context
* 1: Register Metric
* 2: Next Bolt Message
* @param collector (thread-safe)
* 1: Emit Tuple
* 2: Ack/Fail Tuple
* */
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this._collector = collector;
// register metric for monitor
this._ackMetric = context.registerMetric("sentence.split.ack.metric", new CountMetric(), 60);
this._failMetric = context.registerMetric("sentence.split.fail.metric", new CountMetric(), 60);
this._taskId = context.getThisTaskId(); this._separator = (String) stormConf.get(Const.SEPARATOR);
} @Override
public void execute(Tuple input) {
try {
String sentence = input.getString(0);
if (Strings.isNullOrEmpty(sentence)) {
return;
}
String []fields = sentence.split(_separator);
for (String field : fields) {
if (this._direct) {
this._collector.emitDirect(this._taskId, _streamId, input, new Values(field, 1));
} else {
this._collector.emit(this._streamId, input, new Values(field, 1));
}
}
this._collector.ack(input);
this._ackMetric.incr();
} catch (Exception e) {
this._collector.fail(input);
this._failMetric.incr();
}
} @Override
public void cleanup() { } @Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declareStream(this._streamId, this._direct, new Fields("word", "count"));
} @Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
} /**
* Word Sum Bolt
*
* @author hanhan.zhang
* */
public class WordSumBolt extends BaseRichBolt { private OutputCollector _collector; private int _taskId; private Cache<String, AtomicInteger> _wordCache; @Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this._collector = collector;
this._taskId = context.getThisTaskId();
this._wordCache = CacheBuilder.newBuilder()
.maximumSize(1024)
.expireAfterWrite(3, TimeUnit.SECONDS)
.removalListener((removalNotification) -> {
String key = (String) removalNotification.getKey();
AtomicInteger sum = (AtomicInteger) removalNotification.getValue();
System.out.println("word sum result : [" + key + "," + sum.get() + "]");
})
.build();
} @Override
public void execute(Tuple input) {
try {
String word = input.getString(0);
int count = input.getInteger(1);
if (Strings.isEmpty(word)) {
return;
}
AtomicInteger counter = this._wordCache.getIfPresent(word);
if (counter == null) {
this._wordCache.put(word, new AtomicInteger(count));
} else {
counter.addAndGet(count);
}
this._collector.ack(input);
} catch (Exception e) {
this._collector.fail(input);
}
} @Override
public void declareOutputFields(OutputFieldsDeclarer declarer) { }
}
3、Storm运行
/**
* Tuple Split-Flow Topology
*
* @author hanhan.zhang
* */
public class FlowTopology { public static void main(String[] args) { // send tuple
List<Object> []tuple = new List[] {new Values("the cow jumped over the moon"),
new Values("the man went to the store and bought some candy"),
new Values("four score and seven years ago"),
new Values("how many apples can you eat")}; //stream name
String spoutStreamId = "topology.flow.cycle.spout.stream";
String splitStreamId = "topology.flow.split.bolt.stream"; // spout
FixedCycleSpout cycleSpout = new FixedCycleSpout(spoutStreamId, "sentence", true, tuple); // bolt
SentenceSplitBolt splitBolt = new SentenceSplitBolt(splitStreamId, false);
WordSumBolt sumBolt = new WordSumBolt(); TopologyBuilder topologyBuilder = new TopologyBuilder();
topologyBuilder.setSpout ("sentence.cycle.spout", cycleSpout, 1); topologyBuilder.setBolt("sentence.split.bolt", splitBolt, 1)
.directGrouping("sentence.cycle.spout", spoutStreamId); topologyBuilder.setBolt("word.sum.bolt", sumBolt, 3)
.fieldsGrouping("sentence.split.bolt", splitStreamId, new Fields("word")); Config config = new Config();
config.put(Const.SEPARATOR, " "); LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("flowTopology", config, topologyBuilder.createTopology()); } }
Storm---DirectGroup(直接分组)的更多相关文章
- 简单聊聊Storm的流分组策略
简单聊聊Storm的流分组策略 首先我要强调的是,Storm的分组策略对结果有着直接的影响,不同的分组的结果一定是不一样的.其次,不同的分组策略对资源的利用也是有着非常大的不同,本文主要讲一讲loca ...
- Storm Topology及分组原理
Storm的通信机制,需要满足如下一些条件以满足Storm的语义. 1.建立数据传输的缓冲区.在通信连接没有建立之前把发送的数据缓存起来.数据发送方可以在连接建立之前发送消息,而不需要等连接建立起来, ...
- Storm Grouping —— 流分组策略
Storm Grouping: Shuffle Grouping :随机分组,尽量均匀分布到下游Bolt中 将流分组定义为混排.这种混排分组意味着来自Spout的输入将混排,或随机分发给此Bolt中的 ...
- storm的流分组
用的是ShuffleGrouping分组方式,并行度设置为3 这是跑下来的结果 参考代码StormTopologyShufferGrouping.java package yehua.storm; i ...
- 大数据量场景下storm自定义分组与Hbase预分区完美结合大幅度节省内存空间
前言:在系统中向hbase中插入数据时,常常通过设置region的预分区来防止大数据量插入的热点问题,提高数据插入的效率,同时可以减少当数据猛增时由于Region split带来的资源消耗.大量的预分 ...
- storm自定义分组与Hbase预分区结合节省内存消耗
Hbas预分区 在系统中向hbase中插入数据时,常常通过设置region的预分区来防止大数据量插入的热点问题,提高数据插入的效率,同时可以减少当数据猛增时由于Region split带来的资源消耗. ...
- storm Tutorial 的解读 + 个人理解
参考链接: Tutorial storm Tutorial 中文解读+分析 导读.摘要: .hadoop有master与slave,Storm与之对应的节点是什么? .Storm控制节点上面运行一个后 ...
- [转载] 使用 Twitter Storm 处理实时的大数据
转载自http://www.ibm.com/developerworks/cn/opensource/os-twitterstorm/ 流式处理大数据简介 Storm 是一个开源的.大数据处理系统,与 ...
- Storm日志分析调研及其实时架构
1.Storm第一个Demo 2.Windows下基于eclipse的Storm应用开发与调试 3.Storm实例+mysql数据库保存 4.Storm原理介绍 5. flume+kafka+stor ...
- Storm知识点
1. 离线计算是什么? 离线计算:批量获取数据.批量传输数据.周期性批量计算数据.数据展示 代表技术:Sqoop批量导入数据.HDFS批量存储数据.MapReduce批量计算数据.Hive批量计算数据 ...
随机推荐
- 四十七 常用内建模块 XML
XML虽然比JSON复杂,在Web中应用也不如以前多了,不过仍有很多地方在用,所以,有必要了解如何操作XML. DOM vs SAX 操作XML有两种方法:DOM和SAX.DOM会把整个XML读入内存 ...
- nginxhttp请求限制丶tcp会话限制和下载速度限制
(1)nginx请求限制 ngx_http_limit_req_module:开启对单个ip丶单个会话在单位时间内请求的限制rate表示限制的速率 1.修改nginx配置文件 #vim /usr/lo ...
- oracle 自己改了 spfile 导致起不来
oracle pfile 出错 今天在升级 oracle 内存的时候参数调错了,导致 oracle 起不来, 情急之下用 vim 修改了 spfile 文件,结果由于该文件是二进制的,不能直接修改,所 ...
- OOX之间的关系
OOA,OOD,OOP三者关系OOA的分析结果可以作为OOD的需求模型OOD的设计结果作为OOP的指导蓝图OOP负责最终实现目标系统
- Bzoj1015/洛谷P1197 [JSOI2008]星球大战(并查集)
题面 Bzoj 洛谷 题解 考虑离线做法,逆序处理,一个一个星球的加入.用并查集维护一下连通性就好了. 具体来说,先将被消灭的星球储存下来,先将没有被消灭的星球用并查集并在一起,这样做可以路径压缩,然 ...
- Python开发基础-Day22反射、面向对象进阶
isinstance(obj,cls)和issubclass(sub,super) isinstance(obj,cls)检查是否obj是否是类 cls 的对象,如果是返回True class Foo ...
- 求高精度幂(poj1001)
Description Problems involving the computation of exact values of very large magnitude and precision ...
- EasyUI学习总结(四)——parser源码分析(转载)
本文转载自:http://www.cnblogs.com/xdp-gacl/p/4082561.html parser模块是easyloader第一个加载的模块,它的主要作用,就是扫描页面上easyu ...
- 原型开发工具 mockplus
韩梦飞沙 韩亚飞 313134555@qq.com yue31313 han_meng_fei_sha 原型开发工具 mockplus 微信(演示) - Mockup Plus Web Ap ...
- 【BZOJ 1018】【SHOI 2008】堵塞的交通traffic
http://www.lydsy.com/JudgeOnline/problem.php?id=1018 线段树维护连通性. 把每一列看成一个节点,对于线段树上的每一个节点,维护8个信息,前6个字面意 ...