storm的流分组

用的是ShuffleGrouping分组方式,并行度设置为3


这是跑下来的结果

参考代码StormTopologyShufferGrouping.java
package yehua.storm;
import java.util.Map;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;
/**
* shufferGrouping
* 没有特殊情况下,就使用这个分组方式,可以保证负载均衡,工作中最常用的
* @author yehua
*
*/
public class StormTopologyShufferGrouping {
public static class MySpout extends BaseRichSpout{
private Map conf;
private TopologyContext context;
private SpoutOutputCollector collector;
// @Override
public void open(Map conf, TopologyContext context,
SpoutOutputCollector collector) {
this.conf = conf;
this.collector = collector;
this.context = context;
}
int num = 0;
//@Override
public void nextTuple() {
num++;
System.out.println("spout:"+num);
this.collector.emit(new Values(num));
Utils.sleep(1000);
}
//@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("num"));
}
}
public static class MyBolt extends BaseRichBolt{
private Map stormConf;
private TopologyContext context;
private OutputCollector collector;
// @Override
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
this.stormConf = stormConf;
this.context = context;
this.collector = collector;
}
//@Override
public void execute(Tuple input) {
Integer num = input.getIntegerByField("num");
System.err.println("thread:"+Thread.currentThread().getId()+",num="+num);
}
//@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
}
public static void main(String[] args) {
TopologyBuilder topologyBuilder = new TopologyBuilder();
String spout_id = MySpout.class.getSimpleName();
String bolt_id = MyBolt.class.getSimpleName();
topologyBuilder.setSpout(spout_id, new MySpout());
topologyBuilder.setBolt(bolt_id, new MyBolt(),3).shuffleGrouping(spout_id);
Config config = new Config();
String topology_name = StormTopologyShufferGrouping.class.getSimpleName();
if(args.length==0){
//在本地运行
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology(topology_name, config, topologyBuilder.createTopology());
}else{
//在集群运行
try {
StormSubmitter.submitTopology(topology_name, config, topologyBuilder.createTopology());
} catch (AlreadyAliveException e) {
e.printStackTrace();
} catch (InvalidTopologyException e) {
e.printStackTrace();
} catch (AuthorizationException e) {
e.printStackTrace();
}
}
}
}
用fieldsGrouping方法

按奇偶数分组(也就是按字段分组)

从跑出来的结果看出来,一个线程处理奇数的一个线程处理偶数的

参考代码StormTopologyFieldsGrouping.java
package yehua.storm; import java.util.Map; import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils; /**
* FieldsGrouping
* 字段分组
* @author yehua
*
*/ public class StormTopologyFieldsGrouping { public static class MySpout extends BaseRichSpout{
private Map conf;
private TopologyContext context;
private SpoutOutputCollector collector;
//@Override
public void open(Map conf, TopologyContext context,
SpoutOutputCollector collector) {
this.conf = conf;
this.collector = collector;
this.context = context;
} int num = 0;
//@Override
public void nextTuple() {
num++;
System.out.println("spout:"+num);
this.collector.emit(new Values(num,num%2));
Utils.sleep(1000);
} //@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("num","flag"));
} } public static class MyBolt extends BaseRichBolt{ private Map stormConf;
private TopologyContext context;
private OutputCollector collector;
//@Override
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
this.stormConf = stormConf;
this.context = context;
this.collector = collector;
} //@Override
public void execute(Tuple input) {
Integer num = input.getIntegerByField("num");
System.err.println("thread:"+Thread.currentThread().getId()+",num="+num);
} //@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) { } } public static void main(String[] args) {
TopologyBuilder topologyBuilder = new TopologyBuilder();
String spout_id = MySpout.class.getSimpleName();
String bolt_id = MyBolt.class.getSimpleName(); topologyBuilder.setSpout(spout_id, new MySpout());
//注意:字段分组一定可以保证相同分组的数据进入同一个线程处理
topologyBuilder.setBolt(bolt_id, new MyBolt(),2).fieldsGrouping(spout_id, new Fields("flag")); Config config = new Config();
String topology_name = StormTopologyFieldsGrouping.class.getSimpleName();
if(args.length==0){
//在本地运行
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology(topology_name, config, topologyBuilder.createTopology());
}else{
//在集群运行
try {
StormSubmitter.submitTopology(topology_name, config, topologyBuilder.createTopology());
} catch (AlreadyAliveException e) {
e.printStackTrace();
} catch (InvalidTopologyException e) {
e.printStackTrace();
} catch (AuthorizationException e) {
e.printStackTrace();
}
} } }
这里补充一下,比如说有两类数据3个线程的时候

我们再看看运行结果,发现只有两个线程干活了

还有一种情况,只有一个线程的情况,还是两类数据

从运行结果看出来,所有话一个进程干完了

allGrouping方法

运行结果:spout每发一条数据三个进程都接收到了(基本没什么应用场景)

参考代码StormTopologyAllGrouping.java
package yehua.storm; import java.util.Map; import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils; /**
* AllGrouping
* 广播分组
* @author yehua
*
*/ public class StormTopologyAllGrouping { public static class MySpout extends BaseRichSpout{
private Map conf;
private TopologyContext context;
private SpoutOutputCollector collector;
//@Override
public void open(Map conf, TopologyContext context,
SpoutOutputCollector collector) {
this.conf = conf;
this.collector = collector;
this.context = context;
} int num = 0;
//@Override
public void nextTuple() {
num++;
System.out.println("spout:"+num);
this.collector.emit(new Values(num));
Utils.sleep(1000);
} //@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("num"));
} } public static class MyBolt extends BaseRichBolt{ private Map stormConf;
private TopologyContext context;
private OutputCollector collector;
//@Override
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
this.stormConf = stormConf;
this.context = context;
this.collector = collector;
} //@Override
public void execute(Tuple input) {
Integer num = input.getIntegerByField("num");
System.err.println("thread:"+Thread.currentThread().getId()+",num="+num);
} //@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) { } } public static void main(String[] args) {
TopologyBuilder topologyBuilder = new TopologyBuilder();
String spout_id = MySpout.class.getSimpleName();
String bolt_id = MyBolt.class.getSimpleName(); topologyBuilder.setSpout(spout_id, new MySpout());
topologyBuilder.setBolt(bolt_id, new MyBolt(),3).allGrouping(spout_id); Config config = new Config();
String topology_name = StormTopologyAllGrouping.class.getSimpleName();
if(args.length==0){
//在本地运行
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology(topology_name, config, topologyBuilder.createTopology());
}else{
//在集群运行
try {
StormSubmitter.submitTopology(topology_name, config, topologyBuilder.createTopology());
} catch (AlreadyAliveException e) {
e.printStackTrace();
} catch (InvalidTopologyException e) {
e.printStackTrace();
} catch (AuthorizationException e) {
e.printStackTrace();
}
} } }
LocalOrShufferGrouping方法

spout只会给同一个主机的线程发送数据(图中的线程1),也就是在同一个线程里会被发送数据,这样做的好处就是在同一个进程里发送数据效率搞,不用跨主机传输
但是当数据量太大的时候,线程1处理不了的时候就麻烦了,所以在实际工作中不建议这样做。
这里用的是3个线程(3个bolt),2个进程(2个worker)

从运行的结果我们可以看出来,只有一个线程在接收数据

还有一种情况,如果本地没有线程的时候,他就跟ShufferGrouping的效果一样的

参考代码StormTopologyLocalOrShufferGrouping.java
package yehua.storm; import java.util.Map; import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils; /**
* LocalAllshufferGrouping
* @author yehua
*
*/ public class StormTopologyLocalOrShufferGrouping { public static class MySpout extends BaseRichSpout{
private Map conf;
private TopologyContext context;
private SpoutOutputCollector collector;
//@Override
public void open(Map conf, TopologyContext context,
SpoutOutputCollector collector) {
this.conf = conf;
this.collector = collector;
this.context = context;
} int num = 0;
//@Override
public void nextTuple() {
num++;
System.out.println("spout:"+num);
this.collector.emit(new Values(num));
Utils.sleep(1000);
} //@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("num"));
} } public static class MyBolt extends BaseRichBolt{ private Map stormConf;
private TopologyContext context;
private OutputCollector collector;
//@Override
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
this.stormConf = stormConf;
this.context = context;
this.collector = collector;
} //@Override
public void execute(Tuple input) {
Integer num = input.getIntegerByField("num");
System.err.println("thread:"+Thread.currentThread().getId()+",num="+num);
} //@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) { } } public static void main(String[] args) {
TopologyBuilder topologyBuilder = new TopologyBuilder();
String spout_id = MySpout.class.getSimpleName();
String bolt_id = MyBolt.class.getSimpleName(); topologyBuilder.setSpout(spout_id, new MySpout());
topologyBuilder.setBolt(bolt_id, new MyBolt(),3).localOrShuffleGrouping(spout_id); Config config = new Config();
config.setNumWorkers(2);
String topology_name = StormTopologyLocalOrShufferGrouping.class.getSimpleName();
if(args.length==0){
//在本地运行
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology(topology_name, config, topologyBuilder.createTopology());
}else{
//在集群运行
try {
StormSubmitter.submitTopology(topology_name, config, topologyBuilder.createTopology());
} catch (AlreadyAliveException e) {
e.printStackTrace();
} catch (InvalidTopologyException e) {
e.printStackTrace();
} catch (AuthorizationException e) {
e.printStackTrace();
}
} } }
storm的流分组的更多相关文章
- 简单聊聊Storm的流分组策略
简单聊聊Storm的流分组策略 首先我要强调的是,Storm的分组策略对结果有着直接的影响,不同的分组的结果一定是不一样的.其次,不同的分组策略对资源的利用也是有着非常大的不同,本文主要讲一讲loca ...
- Storm Grouping —— 流分组策略
Storm Grouping: Shuffle Grouping :随机分组,尽量均匀分布到下游Bolt中 将流分组定义为混排.这种混排分组意味着来自Spout的输入将混排,或随机分发给此Bolt中的 ...
- Storm流分组介绍
Storm流分组介绍 流分组是拓扑定义的一部分,每个Bolt指定应该接收哪个流作为输入.流分组定义了流/元组如何在Bolt的任务之间进行分发.在设计拓扑的时候需要定义数据 ...
- Storm Topology及分组原理
Storm的通信机制,需要满足如下一些条件以满足Storm的语义. 1.建立数据传输的缓冲区.在通信连接没有建立之前把发送的数据缓存起来.数据发送方可以在连接建立之前发送消息,而不需要等连接建立起来, ...
- Stream grouping-storm的流分组策略
A stream grouping tells a topology how to send tuples between two components. Remember, spouts and b ...
- Storm 学习之路(一)—— Storm和流处理简介
一.Storm 1.1 简介 Storm 是一个开源的分布式实时计算框架,可以以简单.可靠的方式进行大数据流的处理.通常用于实时分析,在线机器学习.持续计算.分布式RPC.ETL等场景.Storm具有 ...
- Storm 系列(一)—— Storm和流处理简介
一.Storm 1.1 简介 Storm 是一个开源的分布式实时计算框架,可以以简单.可靠的方式进行大数据流的处理.通常用于实时分析,在线机器学习.持续计算.分布式 RPC.ETL 等场景.Storm ...
- Storm实时流处理Hello World
近来在看Storm的相关资料,以下总结一下配置过程和Hello World例子. Storm是分布式的实时计算系统.详细文档可参考Storm网站,也可以参阅<Getting started wi ...
- java8 新特性 Stream流 分组 排序 过滤 多条件去重
private static List<User> list = new ArrayList<User>(); public static void main(String[] ...
随机推荐
- Oracle中varchar2类型字段长度限制使用问题
为纪念中华人民共和国建军90周年,特此一篇,以此纪念,我军威武!!! 一.问题背景 项目中商品发布,却没有保存成功. 二.问题定位 初步判断向数据库中保存时出现了错误,查看日志文件,由于日志文件过大就 ...
- Adaboost新理解
Adaboost有几个难点: 1.弱分类器的权重怎么理解? 误差大的弱分类器权重小,误差小的弱分类器权重大.这很好理解.在台湾大学林轩田老师的视频中,推导说,这个权值实际上貌似梯度下降,权值定义成1/ ...
- 设置ip地址、掩码、网关、DNS
@echo offcolor f8mode con cols=40 lines=8echo.echo.echo 设置IP为:echo.set /p ip= 192. ...
- centos yum出现no module named yum
运行yum出现如下错误 There was a problem importing one of the Python modules required to run yum. The error l ...
- 配置搭建与使用redis
redis单点.redis主从.redis哨兵 sentinel,redis集群cluster配置搭建与使用 redis是如今被互联网公司使用最广泛的一个中间件,我们打开GitHub搜索redis,边 ...
- Finally什么时候会被执行
PS:有return意味着程序结束,他一定会在程序结束前执行: PS: return返回前 会把数据存储到指定的位置,基本类型是不会改变的.引用类型是会影响修改的值的
- ORACLE expdp/impdp详解
ORCALE10G提供了新的导入导出工具,数据泵.Oracle官方对此的形容是:Oracle DataPump technology enables Very High-Speed movement ...
- commons-logging log4j logback 知识点
log4j 2,需要导入2个jar包: log4j-core-xx.jar log4j-api-xx.jar log4j 2 的 properties 配置文件名字为: log4j2.properti ...
- Ionic 3 自定义组件的使用
1. 创建组件 ionic g component myComponent myComponent为组件名称 创建好后,生成的文件如下图 2. 在Page 中使用 使用的是home 在home.htm ...
- Microsoft Dynamics CRM service 创建,更新等操作时,注意写抛出异常时,抛出SoapException异常
具体如下: using System.Web.Services.Protocols; try{ crmService.Update(procurementPlanEntity);//更新操作}catc ...