Storm---DirectGroup(直接分组)
以单词分割计数为例实现Storm的DirectGroup分组:
1、Spout实现
Spout是Storm数据源头,使用DirectGroup方式将Spout数据发送指定的Bolt,需注意:
1)、Spout消费的Bolt的Task(Task应为Storm的Executor的编号),在如下代码中Spout.open()初始化中拿到消费Task
2)、需使用SpoutOutputCollector.emitDirect()方法
3)、将Spout声明为直接流,即在Spout.declareOutputFields()声明
/**
* Fixed Cycle Spout
*
* @author hanhan.zhang
* */
public class FixedCycleSpout implements IRichSpout { private String _fieldName; private boolean _direct; // stream mark
private String _streamId; private int _index; // key = msgId, value = sending tuple
private Map<String, List<Object>> _pendingTuple; // send tuple
private List<Object> [] _sendTuple; private SpoutOutputCollector _collector;
private CountMetric _sendMetric;
private CountMetric _failMetric; // consume task set
private List<Integer> _consumeTaskIdList; public FixedCycleSpout(String _streamId, String _fieldName, boolean _direct, List<Object> ... _sendTuple) {
this._streamId = _streamId;
this._fieldName = _fieldName;
this._direct = _direct;
this._sendTuple = _sendTuple;
} @Override
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
this._index = 0;
_pendingTuple = Maps.newHashMap(); // register metric
this._sendMetric = context.registerMetric("cycle.spout.send.tuple.metric", new CountMetric(), 60);
this._failMetric = context.registerMetric("cycle.spout.fail.tuple.metric", new CountMetric(), 60);
this._collector = collector; // get consume task id
if (this._direct) {
this._consumeTaskIdList = Lists.newLinkedList();
Map<String, Map<String, Grouping>> consumeTargets = context.getThisTargets();
if (consumeTargets != null && !consumeTargets.isEmpty()) {
// streamId = this._streamId
consumeTargets.forEach((streamId, target) -> {
if (target != null && !target.isEmpty()) {
// componentId = consume target component Id
target.forEach((componentId, group) -> {
if (group.is_set_direct()) {
this._consumeTaskIdList.addAll(context.getComponentTasks(componentId));
}
});
}
});
}
}
} @Override
public void close() { } @Override
public void activate() { } @Override
public void deactivate() { } @Override
public void nextTuple() {
this._sendMetric.incr();
if (this._index == _sendTuple.length) {
this._index = 0;
}
String msgId = UUID.randomUUID().toString();
List<Object> tuple = this._sendTuple[this._index++];
sendTuple(msgId, tuple);
} @Override
public void ack(Object msgId) {
String msgIdStr = (String) msgId;
System.out.println("ack tuple with msgId " + msgIdStr);
this._pendingTuple.remove(msgIdStr);
} @Override
public void fail(Object msgId) {
this._failMetric.incr();
String msgIdStr = (String) msgId;
System.out.println("fail tuple with msgId " + msgIdStr);
sendTuple(msgIdStr, this._pendingTuple.get(msgIdStr));
} @Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declareStream(this._streamId, this._direct, new Fields(_fieldName));
} @Override
public Map<String, Object> getComponentConfiguration() {
return null;
} protected void sendTuple(String msgId, List<Object> tuple) {
this._pendingTuple.put(msgId, tuple);
if (this._direct) {
if (this._consumeTaskIdList == null || this._consumeTaskIdList.isEmpty()) {
throw new IllegalStateException("direct task is empty !");
}
this._consumeTaskIdList.forEach(taskId ->
this._collector.emitDirect(taskId, this._streamId, tuple, msgId));
} else {
this._collector.emit(tuple, msgId);
}
}
}
2、Bolt实现
/**
* Sentence Split Bolt
*
* @author hanhan.zhang
* */
public class SentenceSplitBolt implements IRichBolt { private OutputCollector _collector; private CountMetric _ackMetric; private CountMetric _failMetric; private String _separator; private int _taskId; private boolean _direct; private String _streamId; public SentenceSplitBolt(String _streamId, boolean _direct) {
this._streamId = _streamId;
this._direct = _direct;
} /**
* @param context
* 1: Register Metric
* 2: Next Bolt Message
* @param collector (thread-safe)
* 1: Emit Tuple
* 2: Ack/Fail Tuple
* */
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this._collector = collector;
// register metric for monitor
this._ackMetric = context.registerMetric("sentence.split.ack.metric", new CountMetric(), 60);
this._failMetric = context.registerMetric("sentence.split.fail.metric", new CountMetric(), 60);
this._taskId = context.getThisTaskId(); this._separator = (String) stormConf.get(Const.SEPARATOR);
} @Override
public void execute(Tuple input) {
try {
String sentence = input.getString(0);
if (Strings.isNullOrEmpty(sentence)) {
return;
}
String []fields = sentence.split(_separator);
for (String field : fields) {
if (this._direct) {
this._collector.emitDirect(this._taskId, _streamId, input, new Values(field, 1));
} else {
this._collector.emit(this._streamId, input, new Values(field, 1));
}
}
this._collector.ack(input);
this._ackMetric.incr();
} catch (Exception e) {
this._collector.fail(input);
this._failMetric.incr();
}
} @Override
public void cleanup() { } @Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declareStream(this._streamId, this._direct, new Fields("word", "count"));
} @Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
} /**
* Word Sum Bolt
*
* @author hanhan.zhang
* */
public class WordSumBolt extends BaseRichBolt { private OutputCollector _collector; private int _taskId; private Cache<String, AtomicInteger> _wordCache; @Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this._collector = collector;
this._taskId = context.getThisTaskId();
this._wordCache = CacheBuilder.newBuilder()
.maximumSize(1024)
.expireAfterWrite(3, TimeUnit.SECONDS)
.removalListener((removalNotification) -> {
String key = (String) removalNotification.getKey();
AtomicInteger sum = (AtomicInteger) removalNotification.getValue();
System.out.println("word sum result : [" + key + "," + sum.get() + "]");
})
.build();
} @Override
public void execute(Tuple input) {
try {
String word = input.getString(0);
int count = input.getInteger(1);
if (Strings.isEmpty(word)) {
return;
}
AtomicInteger counter = this._wordCache.getIfPresent(word);
if (counter == null) {
this._wordCache.put(word, new AtomicInteger(count));
} else {
counter.addAndGet(count);
}
this._collector.ack(input);
} catch (Exception e) {
this._collector.fail(input);
}
} @Override
public void declareOutputFields(OutputFieldsDeclarer declarer) { }
}
3、Storm运行
/**
* Tuple Split-Flow Topology
*
* @author hanhan.zhang
* */
public class FlowTopology { public static void main(String[] args) { // send tuple
List<Object> []tuple = new List[] {new Values("the cow jumped over the moon"),
new Values("the man went to the store and bought some candy"),
new Values("four score and seven years ago"),
new Values("how many apples can you eat")}; //stream name
String spoutStreamId = "topology.flow.cycle.spout.stream";
String splitStreamId = "topology.flow.split.bolt.stream"; // spout
FixedCycleSpout cycleSpout = new FixedCycleSpout(spoutStreamId, "sentence", true, tuple); // bolt
SentenceSplitBolt splitBolt = new SentenceSplitBolt(splitStreamId, false);
WordSumBolt sumBolt = new WordSumBolt(); TopologyBuilder topologyBuilder = new TopologyBuilder();
topologyBuilder.setSpout ("sentence.cycle.spout", cycleSpout, 1); topologyBuilder.setBolt("sentence.split.bolt", splitBolt, 1)
.directGrouping("sentence.cycle.spout", spoutStreamId); topologyBuilder.setBolt("word.sum.bolt", sumBolt, 3)
.fieldsGrouping("sentence.split.bolt", splitStreamId, new Fields("word")); Config config = new Config();
config.put(Const.SEPARATOR, " "); LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("flowTopology", config, topologyBuilder.createTopology()); } }
Storm---DirectGroup(直接分组)的更多相关文章
- 简单聊聊Storm的流分组策略
简单聊聊Storm的流分组策略 首先我要强调的是,Storm的分组策略对结果有着直接的影响,不同的分组的结果一定是不一样的.其次,不同的分组策略对资源的利用也是有着非常大的不同,本文主要讲一讲loca ...
- Storm Topology及分组原理
Storm的通信机制,需要满足如下一些条件以满足Storm的语义. 1.建立数据传输的缓冲区.在通信连接没有建立之前把发送的数据缓存起来.数据发送方可以在连接建立之前发送消息,而不需要等连接建立起来, ...
- Storm Grouping —— 流分组策略
Storm Grouping: Shuffle Grouping :随机分组,尽量均匀分布到下游Bolt中 将流分组定义为混排.这种混排分组意味着来自Spout的输入将混排,或随机分发给此Bolt中的 ...
- storm的流分组
用的是ShuffleGrouping分组方式,并行度设置为3 这是跑下来的结果 参考代码StormTopologyShufferGrouping.java package yehua.storm; i ...
- 大数据量场景下storm自定义分组与Hbase预分区完美结合大幅度节省内存空间
前言:在系统中向hbase中插入数据时,常常通过设置region的预分区来防止大数据量插入的热点问题,提高数据插入的效率,同时可以减少当数据猛增时由于Region split带来的资源消耗.大量的预分 ...
- storm自定义分组与Hbase预分区结合节省内存消耗
Hbas预分区 在系统中向hbase中插入数据时,常常通过设置region的预分区来防止大数据量插入的热点问题,提高数据插入的效率,同时可以减少当数据猛增时由于Region split带来的资源消耗. ...
- storm Tutorial 的解读 + 个人理解
参考链接: Tutorial storm Tutorial 中文解读+分析 导读.摘要: .hadoop有master与slave,Storm与之对应的节点是什么? .Storm控制节点上面运行一个后 ...
- [转载] 使用 Twitter Storm 处理实时的大数据
转载自http://www.ibm.com/developerworks/cn/opensource/os-twitterstorm/ 流式处理大数据简介 Storm 是一个开源的.大数据处理系统,与 ...
- Storm日志分析调研及其实时架构
1.Storm第一个Demo 2.Windows下基于eclipse的Storm应用开发与调试 3.Storm实例+mysql数据库保存 4.Storm原理介绍 5. flume+kafka+stor ...
- Storm知识点
1. 离线计算是什么? 离线计算:批量获取数据.批量传输数据.周期性批量计算数据.数据展示 代表技术:Sqoop批量导入数据.HDFS批量存储数据.MapReduce批量计算数据.Hive批量计算数据 ...
随机推荐
- 对TDD原则的理解
1,在编写好失败的单元测试之前,不要编写任何产品代码 如果不先写测试,那么各个函数就会耦合在一起,最后变得无法测试 如果后写测试,你也许能对大块大块的代码进行测试,但是无法对每个函数进行测 ...
- Luogu P4148 简单题(K-D Tree)
题面 题解 因为强制在线,所以我们不能$cdq$分治,所以考虑用$KDT$,$KDT$维护一个矩阵,然后询问的时候如果当前矩形在询问区间内,直接记贡献,否则判断当前点是否在矩阵内,然后左右分别递归下去 ...
- 叙Windows平台下基于MBR和UEFI的bootkit(一)--以MBR为例
安全的对抗首先在权限方面,权限高的进程对权限低的权限就是就是降维打击,无往不利.当权限相同时,启动得早便为王.所谓的bootkit也就是基于这个思路设计的一种复杂病毒.它优先于Windows系统启动, ...
- 【BZOJ 1052】 1052: [HAOI2007]覆盖问题 (乱搞)
1052: [HAOI2007]覆盖问题 Description 某人在山上种了N棵小树苗.冬天来了,温度急速下降,小树苗脆弱得不堪一击,于是树主人想用一些塑料薄 膜把这些小树遮盖起来,经过一番长久的 ...
- POJ 2960 S-Nim 博弈论 sg函数
http://poj.org/problem?id=2960 sg函数几乎是模板题. 调试代码的最大障碍仍然是手残在循环里打错变量名,是时候换个hydra产的机械臂了[超想要.jpg] #includ ...
- BZOJ 1053 [HAOI2007]反素数ant(约数个数)
[题目链接] http://www.lydsy.com/JudgeOnline/problem.php?id=1053 [题目大意] 于任何正整数x,其约数的个数记作g(x).例如g(1)=1.g(6 ...
- 【模拟退火】Petrozavodsk Winter Training Camp 2017 Day 1: Jagiellonian U Contest, Monday, January 30, 2017 Problem F. Factory
让你在平面上取一个点,使得其到给定的所有点的距离和最小. 就是“费马点”. 模拟退火……日后学习一下,这是从网上扒的,先存下. #include<iostream> #include< ...
- Problem A: 自定义函数strcomp(),实现两个字符串的比较
#include<stdio.h> int strcmp(char *str1,char *str2) { if(str1!=NULL&&str2!=NULL) { whi ...
- c#版 mqtt 3.1.1 client 实现
c# 版 mqtt 3.1.1 client http://docs.oasis-open.org/mqtt/mqtt/v3.1.1/mqtt-v3.1.1.html 上面为 3.1.1 协议报文 一 ...
- OM-销售订单行【订购项目】配置参数文件控制
ONT_RESTRICT_CUST_ITEMS OM:限制行层收货地址的客户项目 ONT_USE_MVIEW_FOR_ITEMS_LOV OM:为项目值列表使用物化视图(遵守项目可订购性规则)