这片博客主要是讲解storm-hdfs,Squence及它们的trident方法使用,不多说上代码:

pom.xml

        <dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.3</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.3</version>
<exclusions>
<exclusion>
<artifactId>asm</artifactId>
<groupId>asm</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>1.1.2</version>
</dependency>

我pom.xml中导的包比较多,这上面我是复制出来的相关包,可能会有遗漏,不过我每个类上的导包都会有不必担心,

HDFS:

HdfsSpout:获取hdfs中的数据



import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values; /**
* @author cwc
* @date 2018年5月25日
* @description: 接收hdfs的spout
* @version 1.0.0
*/
public class HdfsReadSpout extends BaseRichSpout{
private static final long serialVersionUID = 1L; private SpoutOutputCollector collector;
private String url="/testData/mytest/app_hdfs-bolt-2-0-1528096963805.log";
private ArrayList<String> arrayList; @Override
public void nextTuple() {
hdfsData(url);
} @Override
public void open(Map conf, TopologyContext arg1, SpoutOutputCollector collector) {
this.collector =collector;
} @Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("lines"));
} private void hdfsData(String FileName){
System.out.println("开始拿数据");
Configuration conf=new Configuration();
conf.set("fs.defaultFS","hdfs://172.18.130.100:8020");
BufferedReader in = null;
FSDataInputStream dis;
String line;
try {
FileSystem hdfs = FileSystem.get(conf);
dis = hdfs.open(new Path(FileName));
in = new BufferedReader(new InputStreamReader(dis, "UTF-8"));
while ((line = in.readLine()) != null){
// System.out.println("拿到的数据为"+line);
this.collector.emit(new Values(line));
}
} catch (IllegalArgumentException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}

storm-hdfs主要演示类:



import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.hdfs.bolt.HdfsBolt;
import org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat;
import org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat;
import org.apache.storm.hdfs.bolt.format.FileNameFormat;
import org.apache.storm.hdfs.bolt.format.RecordFormat;
import org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy;
import org.apache.storm.hdfs.bolt.rotation.TimedRotationPolicy;
import org.apache.storm.hdfs.bolt.rotation.TimedRotationPolicy.TimeUnit;
import org.apache.storm.hdfs.bolt.sync.CountSyncPolicy;
import org.apache.storm.hdfs.bolt.sync.SyncPolicy;
import org.apache.storm.hdfs.spout.HdfsSpout;
import org.apache.storm.hdfs.spout.TextFileReader;
import org.apache.storm.topology.TopologyBuilder;
import com.sunsheen.jfids.bigdata.storm.demo.count.TestSpout;
/**
*
* @ClassName: HDFSMain
* @Description: storm数据写入读取hdfs中
* @author cwc
* @date 2018年6月4日 下午3:28:12
* @version 2.0.0 二次改进版
*/
public class HDFSMain{ public static void main(String[] args) {
writeHdfs(args);
// readHdfs(args);
// selectHdfs(args);
} /**
* storm将数据写入Hdfs
* @param args 传入的参数
*/
public static void writeHdfs(String[] args){
//Configure HDFS bolt
RecordFormat format = new DelimitedRecordFormat().withFieldDelimiter("\t"); // 输出字段分隔符
SyncPolicy syncPolicy = new CountSyncPolicy(1000);// 每1000个tuple同步到HDFS一次
FileRotationPolicy rotationPolicy = new TimedRotationPolicy(1.0f, TimeUnit.MINUTES); // rotate files
FileNameFormat fileNameFormat = new DefaultFileNameFormat()
.withPath("/testData/mytest/").withPrefix("app_").withExtension(".log"); //目录名,文件名
HdfsBolt hdfsBolt = new HdfsBolt()
.withFsUrl("hdfs://172.18.130.100:8020")
.withFileNameFormat(fileNameFormat)
.withRecordFormat(format)
.withRotationPolicy(rotationPolicy)
.withSyncPolicy(syncPolicy);
// configure & build topology
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("kafka-write", new TestSpout(), 5);
builder.setBolt("hdfs-write", hdfsBolt, 2).shuffleGrouping("kafka-write");
// submit topology
Config conf = new Config();
String name = HDFSMain.class.getSimpleName();
if (args != null && args.length > 0) {
String nimbus = args[0];
conf.put(Config.NIMBUS_HOST, nimbus);
conf.setNumWorkers(3);
try {
StormSubmitter.submitTopologyWithProgressBar(name, conf, builder.createTopology());
} catch (AlreadyAliveException | InvalidTopologyException | AuthorizationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} else {
conf.setMaxTaskParallelism(3);
LocalCluster cluster = new LocalCluster();
cluster.submitTopology(name, conf, builder.createTopology());
try {
Thread.sleep(100000);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
cluster.shutdown();
}
}
/**
* 读取Hdfs
* @param args 传入的参数
*/
public static void readHdfs(String[] args){
// configure & build topology
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("hdfs-reader", new HdfsReadSpout(), 3);
builder.setBolt("hdfs-read", new HDFSReadBolt(), 2).shuffleGrouping("hdfs-reader");
// submit topology
Config conf = new Config();
String name = HDFSMain.class.getSimpleName();
if (args != null && args.length > 0) {
String nimbus = args[0];
conf.put(Config.NIMBUS_HOST, nimbus);
conf.setNumWorkers(3);
try {
StormSubmitter.submitTopologyWithProgressBar(name, conf, builder.createTopology());
} catch (AlreadyAliveException | InvalidTopologyException | AuthorizationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} else {
conf.setMaxTaskParallelism(3);
LocalCluster cluster = new LocalCluster();
cluster.submitTopology(name, conf, builder.createTopology());
try {
Thread.sleep(100000);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
cluster.shutdown();
}
}
/**
* 这个算是高级API了吧,是直接读取一个目录的hdfs文件。。。批量读取
* @param args
*/
public static void selectHdfs(String[] args){
// Instantiate spout to read text files
HdfsSpout textReaderSpout = new HdfsSpout().setReaderType("text")
.withOutputFields(TextFileReader.defaultFields)
.setHdfsUri("hdfs://172.18.130.100:8020") // url
.setSourceDir("/testData/mytest") // 要读取的目录
.setArchiveDir("/testData/mytest/done") // 处理完成后移动到该目录
.setBadFilesDir("/testData/mytest/badfiles"); // 读取异常时移动的目录
// Create topology
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("hdfsspout", textReaderSpout, 2);
builder.setBolt("hdfs-read", new HDFSReadBolt(), 2).shuffleGrouping("hdfsspout");
// submit topology
Config conf = new Config();
String name = HDFSMain.class.getSimpleName();
if (args != null && args.length > 0) {
String nimbus = args[0];
conf.put(Config.NIMBUS_HOST, nimbus);
conf.setNumWorkers(3);
try {
StormSubmitter.submitTopologyWithProgressBar(name, conf, builder.createTopology());
} catch (AlreadyAliveException | InvalidTopologyException | AuthorizationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} else {
conf.setMaxTaskParallelism(3);
LocalCluster cluster = new LocalCluster();
cluster.submitTopology(name, conf, builder.createTopology());
try {
Thread.sleep(100000);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
cluster.shutdown();
}
}
}

Trident高级接口:大家看这片之前先去看看trident与普通的方法有什么区别,我们为什么要用这个接口,勤于思考

package com.sunsheen.jfids.bigdata.storm.demo.hdfs.trident;

import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.hdfs.trident.HdfsState;
import org.apache.storm.hdfs.trident.HdfsStateFactory;
import org.apache.storm.hdfs.trident.HdfsUpdater;
import org.apache.storm.hdfs.trident.format.DefaultFileNameFormat;
import org.apache.storm.hdfs.trident.format.DelimitedRecordFormat;
import org.apache.storm.hdfs.trident.format.FileNameFormat;
import org.apache.storm.hdfs.trident.format.RecordFormat;
import org.apache.storm.hdfs.trident.rotation.FileRotationPolicy;
import org.apache.storm.hdfs.trident.rotation.FileSizeRotationPolicy;
import org.apache.storm.trident.Stream;
import org.apache.storm.trident.TridentState;
import org.apache.storm.trident.TridentTopology;
import org.apache.storm.trident.state.StateFactory;
import org.apache.storm.tuple.Fields; import com.sunsheen.jfids.bigdata.storm.demo.count.TestSpout;
import com.sunsheen.jfids.bigdata.storm.demo.hdfs.HDFSMain; /**
* @author cwc
* @date 2018年6月4日
* @description: hdfs Trident 写入接口
* @version 1.0.0
*/
public class HdfsTridents { public static void main(String[] args){ Config conf = new Config();
conf.setMaxSpoutPending(3);
String name = HDFSMain.class.getSimpleName();
if (args != null && args.length > 0) {
//服务器
try {
StormSubmitter.submitTopology(args[1], conf, buildTopology());
} catch (AlreadyAliveException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InvalidTopologyException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (AuthorizationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} else {
//本地
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("test", conf, buildTopology());
try {
Thread.sleep(100000);
cluster.shutdown();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
/**
* 获取StormTopology
* @return
*/
public static StormTopology buildTopology(){
//注意写入之后发现文件中是空数据,应该是Spout端传值或者分割符有问题
TestSpout spout = new TestSpout();
TridentTopology topology = new TridentTopology();
Stream stream = topology.newStream("stream", spout);//创建stream
Fields hdfsFields = new Fields("line","values"); //拿取spout数据 FileNameFormat fileNameFormat = new DefaultFileNameFormat()
.withPrefix("trident")//文件前缀
.withExtension(".txt")//文件后缀
.withPath("/testData/mytest/trident/");//文件目录 RecordFormat recordFormat = new DelimitedRecordFormat()
.withFieldDelimiter("/t")//分割符
.withFields(hdfsFields); FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, FileSizeRotationPolicy.Units.MB);//设置一个大小条件,到达后就写入 HdfsState.Options options = new HdfsState.HdfsFileOptions()
.withFileNameFormat(fileNameFormat)
.withRecordFormat(recordFormat)
.withRotationPolicy(rotationPolicy)
.withFsUrl("hdfs://172.18.130.100:8020");//主机名 StateFactory factory = new HdfsStateFactory().withOptions(options);//创建工厂
TridentState state = stream
.partitionPersist(factory, hdfsFields, new HdfsUpdater(), new Fields());//这步是写入最重要的一步
return topology.build();
}
}

Sequence模块:

spout类:



import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.SequenceFile.Reader;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values; /**
* @author cwc
* @date 2018年7月24日
* @version 1.0.0
* @description:读取sequence数据
*/
public class SquenceSpout extends BaseRichSpout {
private static final long serialVersionUID = 1L;
private SpoutOutputCollector collector;
private String fileUrl ="hdfs://172.18.130.100:8020/testData/mytest/trident/Topology-1-0-1532415869703.seq"; /**
* 有参
* @param fileUrl
*/
public SquenceSpout(String fileUrl){
this.fileUrl=fileUrl;//有参可以在调用时直接传参
} @Override
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
// TODO Auto-generated method stub
this.collector =collector;
} @Override
public void nextTuple() {
// TODO Auto-generated method stub
getSquence();
} /**
* 获取Sequence文件数据
*/
public void getSquence(){
Configuration conf = new Configuration();
Path path = new Path(this.fileUrl);
SequenceFile.Reader.Option option1 = Reader.file(path);
// SequenceFile.Reader.Option option2 = Reader.length(30);//这个参数表示读取的长度,不知道长度就别开,开了报错
SequenceFile.Reader reader = null;
try {
reader = new SequenceFile.Reader(conf,option1);
Writable key = (Writable) ReflectionUtils.newInstance(
reader.getKeyClass(), conf);
Writable value = (Writable) ReflectionUtils.newInstance(
reader.getValueClass(), conf);
long position = reader.getPosition();
while (reader.next(key, value)) {
String syncSeen = reader.syncSeen() ? "*" : "";//我的这个数据读出来是空的
System.out.printf("[%s%s]\t%s\t%s\n", position, syncSeen, key,
value);
System.out.println(position+"---------"+syncSeen+"=========="+key+"————————————————"+value);
this.collector.emit(new Values(position,syncSeen,key,value));//将数据发送到后方,用集合是不是更好?
position = reader.getPosition(); //开始记录下一个目录
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
IOUtils.closeStream(reader);
} } @Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// TODO Auto-generated method stub
declarer.declare(new Fields("he","llo","wo","rd"));
} }

bolt类:



import java.io.IOException;
import java.net.URI;
import java.util.Map;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.storm.hdfs.bolt.AbstractHdfsBolt;
import org.apache.storm.hdfs.bolt.format.FileNameFormat;
import org.apache.storm.hdfs.bolt.format.SequenceFormat;
import org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy;
import org.apache.storm.hdfs.bolt.sync.SyncPolicy;
import org.apache.storm.hdfs.common.AbstractHDFSWriter;
import org.apache.storm.hdfs.common.Partitioner;
import org.apache.storm.hdfs.common.SequenceFileWriter;
import org.apache.storm.hdfs.common.rotation.RotationAction;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.tuple.Tuple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author cwc
* @date 2018年7月24日
* @version 1.0.0
* @description:这个是为了给导包没有该类的同学准备的
*/
public class SequenceFileBolt extends AbstractHdfsBolt {
private static final long serialVersionUID = 1L; private static final Logger LOG = LoggerFactory.getLogger(SequenceFileBolt.class);//打印日志 private SequenceFormat format;
private SequenceFile.CompressionType compressionType = SequenceFile.CompressionType.RECORD;
private transient SequenceFile.Writer writer; private String compressionCodec = "default";
private transient CompressionCodecFactory codecFactory; public SequenceFileBolt() {
} public SequenceFileBolt withCompressionCodec(String codec) {
this.compressionCodec = codec;
return this;
} public SequenceFileBolt withFsUrl(String fsUrl) {
this.fsUrl = fsUrl;
return this;
} public SequenceFileBolt withConfigKey(String configKey) {
this.configKey = configKey;
return this;
} public SequenceFileBolt withFileNameFormat(FileNameFormat fileNameFormat) {
this.fileNameFormat = fileNameFormat;
return this;
} public SequenceFileBolt withSequenceFormat(SequenceFormat format) {
this.format = format;
return this;
} public SequenceFileBolt withSyncPolicy(SyncPolicy syncPolicy) {
this.syncPolicy = syncPolicy;
return this;
} public SequenceFileBolt withRotationPolicy(FileRotationPolicy rotationPolicy) {
this.rotationPolicy = rotationPolicy;
return this;
} public SequenceFileBolt withCompressionType(SequenceFile.CompressionType compressionType) {
this.compressionType = compressionType;
return this;
} public SequenceFileBolt withTickTupleIntervalSeconds(int interval) {
this.tickTupleInterval = interval;
return this;
} public SequenceFileBolt addRotationAction(RotationAction action) {
this.rotationActions.add(action);
return this;
} public SequenceFileBolt withRetryCount(int fileRetryCount) {
this.fileRetryCount = fileRetryCount;
return this;
} public SequenceFileBolt withPartitioner(Partitioner partitioner) {
this.partitioner = partitioner;
return this;
} public SequenceFileBolt withMaxOpenFiles(int maxOpenFiles) {
this.maxOpenFiles = maxOpenFiles;
return this;
} @Override
public void doPrepare(Map conf, TopologyContext topologyContext, OutputCollector collector) throws IOException {
LOG.info("Preparing Sequence File Bolt...");
if (this.format == null) throw new IllegalStateException("SequenceFormat must be specified."); this.fs = FileSystem.get(URI.create(this.fsUrl), hdfsConfig);
this.codecFactory = new CompressionCodecFactory(hdfsConfig);
} @Override
protected String getWriterKey(Tuple tuple) {
return "CONSTANT";
} @Override
protected AbstractHDFSWriter makeNewWriter(Path path, Tuple tuple) throws IOException {
SequenceFile.Writer writer = SequenceFile.createWriter(
this.hdfsConfig,
SequenceFile.Writer.file(path),
SequenceFile.Writer.keyClass(this.format.keyClass()),
SequenceFile.Writer.valueClass(this.format.valueClass()),
SequenceFile.Writer.compression(this.compressionType, this.codecFactory.getCodecByName(this.compressionCodec))
); return new SequenceFileWriter(this.rotationPolicy, path, writer, this.format);
}
}

SequenceFileTopology:



import java.io.FileInputStream;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.hadoop.io.SequenceFile;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat;
import org.apache.storm.hdfs.bolt.format.DefaultSequenceFormat;
import org.apache.storm.hdfs.bolt.format.FileNameFormat;
import org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy;
import org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy;
import org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy.Units;
import org.apache.storm.hdfs.bolt.sync.CountSyncPolicy;
import org.apache.storm.hdfs.bolt.sync.SyncPolicy;
import org.apache.storm.hdfs.common.rotation.MoveFileAction;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.yaml.snakeyaml.Yaml;
/**
* @author cwc
* @date 2018年7月24日
* @version 1.0.0
* @description:Sequence文件写入
*/
public class SequenceFileTopology {
static final String SENTENCE_SPOUT_ID = "sentence-spout";
static final String BOLT_ID = "Topology";//定义生成文件的前缀
static final String TOPOLOGY_NAME = "test-topology"; public static void main(String[] args) throws Exception {
Config config = new Config();
config.setNumWorkers(1); SentenceSpout spout = new SentenceSpout(); // 在每个1k元组之后同步文件系统
SyncPolicy syncPolicy = new CountSyncPolicy(1000); // 当文件达到5MB时进行旋转
FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, Units.MB); FileNameFormat fileNameFormat = new DefaultFileNameFormat()
.withPath("/testData/mytest/trident/")//存储位置
.withExtension(".seq");//后缀名 // create sequence format instance.
DefaultSequenceFormat format = new DefaultSequenceFormat("timestamp", "sentence");//spout接收到的数据 // Yaml yaml = new Yaml();//这个目的就是给config传值,这个应该是从xml中获取数据
// InputStream in = new FileInputStream("hdfs://172.18.130.100:8020");//这里传入的数据是否是url?
// Map<String, Object> yamlConf = (Map<String, Object>) yaml.load(in);
// in.close();
Map<String, Object> yamlConf =new HashMap<String, Object>();
config.put("hdfs.config", yamlConf);//加载自定义的参数 SequenceFileBolt bolt = new SequenceFileBolt()
.withFsUrl("hdfs://172.18.130.100:8020")//hdfs
.withConfigKey("hdfs.config")
.withFileNameFormat(fileNameFormat)
.withSequenceFormat(format)
.withRotationPolicy(rotationPolicy)
.withSyncPolicy(syncPolicy)
.withCompressionType(SequenceFile.CompressionType.RECORD)
.withCompressionCodec("deflate")//放出
.addRotationAction(new MoveFileAction().toDestination("/testData/mytest/seqOne/"));//添加一个循环操作中有个移除文件的作用是什么作用? TopologyBuilder builder = new TopologyBuilder(); builder.setSpout(SENTENCE_SPOUT_ID, spout, 1);
// SentenceSpout --> MyBolt
builder.setBolt(BOLT_ID, bolt, 4)
.shuffleGrouping(SENTENCE_SPOUT_ID); String topoName = TOPOLOGY_NAME;
//线上
// if (args.length == 3) {
// topoName = args[2];
// } else if (args.length > 3) {
// System.out.println("Usage: SequenceFileTopology [hdfs url] [hdfs yaml config file] <topology name>");
// return;
// }
// StormSubmitter.submitTopology(topoName, config, builder.createTopology());
//本地测试运行
LocalCluster cluster = new LocalCluster();
cluster.submitTopology(topoName, config, builder.createTopology());
try {
Thread.sleep(100000);
cluster.shutdown();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} public static void waitForSeconds(int seconds) {
try {
Thread.sleep(seconds * 1000);
} catch (InterruptedException e) {
}
} public static class SentenceSpout extends BaseRichSpout { private static final long serialVersionUID = 1L; private ConcurrentHashMap<UUID, Values> pending;
private SpoutOutputCollector collector;
private String[] sentences = {
"my dog has fleas",
"i like cold beverages",
"the dog ate my homework",
"don't have a cow man",
"i don't think i like fleas"
};
private int index = 0;
private int count = 0;
private long total = 0L; public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("sentence", "timestamp"));
} public void open(Map config, TopologyContext context,
SpoutOutputCollector collector) {
this.collector = collector;
this.pending = new ConcurrentHashMap<UUID, Values>();
} public void nextTuple() {
Values values = new Values(sentences[index], System.currentTimeMillis());
UUID msgId = UUID.randomUUID();
this.pending.put(msgId, values);
this.collector.emit(values, msgId);
index++;
if (index >= sentences.length) {
index = 0;
}
count++;
total++;
if (count > 20000) {
count = 0;
System.out.println("Pending count: " + this.pending.size() + ", total: " + this.total);
}
Thread.yield();
} public void ack(Object msgId) {
// System.out.println("ACK");
this.pending.remove(msgId);
} public void fail(Object msgId) {
System.out.println("**** RESENDING FAILED TUPLE");
this.collector.emit(this.pending.get(msgId), msgId);
}
} public static class MyBolt extends BaseRichBolt { private static final long serialVersionUID = 1L; // private HashMap<String, Long> counts = null;
private HashMap<String, String> counts = null;
private OutputCollector collector; public void prepare(Map config, TopologyContext context, OutputCollector collector) {
// this.counts = new HashMap<String, Long>();
this.counts = new HashMap<String,String>();
this.collector = collector;
} public void execute(Tuple tuple) {
collector.ack(tuple);
} public void declareOutputFields(OutputFieldsDeclarer declarer) {
// this bolt does not emit anything
} @Override
public void cleanup() {
}
}
}

sequenceTrident:



import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.hdfs.common.rotation.MoveFileAction;
import org.apache.storm.hdfs.trident.HdfsState;
import org.apache.storm.hdfs.trident.HdfsStateFactory;
import org.apache.storm.hdfs.trident.HdfsUpdater;
import org.apache.storm.hdfs.trident.format.DefaultFileNameFormat;
import org.apache.storm.hdfs.trident.format.DefaultSequenceFormat;
import org.apache.storm.hdfs.trident.format.FileNameFormat;
import org.apache.storm.hdfs.trident.rotation.FileRotationPolicy;
import org.apache.storm.hdfs.trident.rotation.FileSizeRotationPolicy;
import org.apache.storm.trident.Stream;
import org.apache.storm.trident.TridentState;
import org.apache.storm.trident.TridentTopology;
import org.apache.storm.trident.state.StateFactory;
import org.apache.storm.tuple.Fields;
import com.sunsheen.jfids.bigdata.storm.demo.hdfs.SequenceFileTopology.SentenceSpout; /**
* @author cwc
* @date 2018年6月5日
* @description:Sequence-Trident 高级接口写入
* @version 1.0.0
*/
public class TridentSequenceTopology { /**
* trident与普通Topology区别在于返回值及工厂的使用
* @param hdfsUrl
* @return
*/
public static StormTopology buildTopology(String hdfsUrl){ SentenceSpout spout =new SentenceSpout();
TridentTopology topology = new TridentTopology();
Stream stream = topology.newStream("spout1", spout); Fields hdfsFields = new Fields("sentence", "timestamp"); FileNameFormat fileNameFormat = new DefaultFileNameFormat()
.withPath("/testData/mytest/trident/")
.withPrefix("trident")
.withExtension(".seq");
//旋转
FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, FileSizeRotationPolicy.Units.MB); HdfsState.Options seqOpts = new HdfsState.SequenceFileOptions()
.withFileNameFormat(fileNameFormat)
.withSequenceFormat(new DefaultSequenceFormat("timestamp", "sentence"))
.withRotationPolicy(rotationPolicy)
.withFsUrl(hdfsUrl)
// .withConfigKey("hdfs.config")
.addRotationAction(new MoveFileAction().toDestination("/testData/mytest/trident111/")); StateFactory factory = new HdfsStateFactory().withOptions(seqOpts);
TridentState state = stream
.partitionPersist(factory, hdfsFields, new HdfsUpdater(), new Fields()); return topology.build();} public static void main(String[] args) {
String hdfsUrl ="hdfs://172.18.130.100:8020";
Config conf = new Config();
conf.setMaxSpoutPending(5);
if (args != null && args.length > 0) {
//服务器
try {
StormSubmitter.submitTopology(args[1], conf, buildTopology(hdfsUrl));
} catch (AlreadyAliveException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InvalidTopologyException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (AuthorizationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
else{
//本地
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("test", conf, buildTopology(hdfsUrl));
try {
Thread.sleep(100000);
cluster.shutdown();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}

有疑问的多看看代码,多思考,愿诸君努力。

strom_hdfs与Sequence详解的更多相关文章

  1. oracle之sequence详解

    Oracle提供了sequence对象,由系统提供自增长的序列号,每次取的时候它会自动增加,通常用于生成数据库数据记录的自增长主键或序号的地方. sequence的创建需要用户具有create seq ...

  2. Oracle数据库中序列(SEQUENCE)的用法详解

    Oracle数据库中序列(SEQUENCE)的用法详解   在Oracle数据库中,序列的用途是生成表的主键值,可以在插入语句中引用,也可以通过查询检查当前值,或使序列增至下一个值.本文我们主要介绍了 ...

  3. Illumina Sequence Identifiers 序列标识符 index详解

    大家基本都知道什么是 FASTA 和 FastQ 格式了,但这是不够的. 我们还需要了解世界上最大的测序公司自己定制的 FastQ 格式,因为你可能会经常用到,有时还会亲自去处理它们. 本文主题:Il ...

  4. Nginx 反向代理、负载均衡、页面缓存、URL重写及读写分离详解

    转载:http://freeloda.blog.51cto.com/2033581/1288553 大纲 一.前言 二.环境准备 三.安装与配置Nginx 四.Nginx之反向代理 五.Nginx之负 ...

  5. trie字典树详解及应用

    原文链接    http://www.cnblogs.com/freewater/archive/2012/09/11/2680480.html Trie树详解及其应用   一.知识简介        ...

  6. C# LINQ详解(转)

    C# LINQ详解(一)   原文标题:How does it work in C#?-Part 3 (C# LINQ in detail),作者:Mohammand A Rahman. 目录 LIN ...

  7. oracle中imp命令详解 .

    转自http://www.cnblogs.com/songdavid/articles/2435439.html oracle中imp命令详解 Oracle的导入实用程序(Import utility ...

  8. Spring MVC 学习总结(二)——控制器定义与@RequestMapping详解

    一.控制器定义 控制器提供访问应用程序的行为,通常通过服务接口定义或注解定义两种方法实现. 控制器解析用户的请求并将其转换为一个模型.在Spring MVC中一个控制器可以包含多个Action(动作. ...

  9. Nginx反向代理、负载均衡、页面缓存、URL重写及读写分离详解

    大纲 一.前言 二.环境准备 三.安装与配置Nginx 四.Nginx之反向代理 五.Nginx之负载均衡 六.Nginx之页面缓存 七.Nginx之URL重写 八.Nginx之读写分离 注,操作系统 ...

随机推荐

  1. linux awk 命令实用手册

    0,简介 Linux awk 是一个实用的文本处理工具,它不仅是一款工具软件,也是一门编程语言.awk 的名称来源于其三位作者的姓氏缩写,其作者分别是Alfred Aho,Peter Weinberg ...

  2. zookeeper笔记(二)

    title: zookeeper笔记(二) zookeeper ALC权限控制 getAcl path 可以查看某个node的权限 设置权限: 2. world方式 setAcl <path&g ...

  3. UVALive 7501 Business Cycle

    细心题 #include<bits/stdc++.h> using namespace std; #define rep(i,a,b) for(int i=a;i<=b;++i) # ...

  4. 第三方库PyYAML

    建议参考PyYAML Documentation来源:http://pyyaml.org/wiki/PyYAMLDocumentation:http://blog.csdn.net/conquer07 ...

  5. C++编程入门题目--No.4

    题目: 输入某年某月某日,判断这一天是这一年的第几天? 程序分析: 以3月5日为例,应该先把前两个月的加起来,然后再加上5天即本年的第几天,特殊情况,闰年且输入月份大于3时需考虑多加一天. #incl ...

  6. 数学--数论--hdu 6216 A Cubic number and A Cubic Number (公式推导)

    A cubic number is the result of using a whole number in a multiplication three times. For example, 3 ...

  7. Alink漫谈(二) : 从源码看机器学习平台Alink设计和架构

    Alink漫谈(二) : 从源码看机器学习平台Alink设计和架构 目录 Alink漫谈(二) : 从源码看机器学习平台Alink设计和架构 0x00 摘要 0x01 Alink设计原则 0x02 A ...

  8. Programmatically add an application to Windows Firewall

    Programmatically add an application to Windows Firewall 回答1   Not sure if this is the best way, but ...

  9. Leetcode_45. 跳跃游戏 II

    每个位置i有一个最大跳跃距离,求最小步数从0跳到n-1. dp[i]表示从0跳到i的最少步数,显然可以转移的状态就是从i-a[i]到i-1. 因为是最小步数,考虑用优先队列优化,再考虑到状态有范围的, ...

  10. Java网络小结

    1,定位 IP对机器的定位 端口对软件的定位(65535) URL对软件上每一份资源的定位 2,TCP和UDP TCP 安全,性能低 ①ServerSocket②Socket UDP不安全,性能高 ① ...