Trident整合MongoDB
MongoDB是大数据技术中常用的NoSql型数据库,它提供的大量的查询、聚合等操作函数,对于大量查询的日志系统来说,该MongoDB是大数据日志存储的福音。Storm的高级编程技术Trident,也提供了与Mongo集成的方法,但官方只提供了新增的处理,对于常用的修改操作并未提供接口,本文提供了一种使用Trident进行mongoDB修改操作的方式,并且对持久化的数据提供了输出的拓展操作,具体代码见下方:
import java.util.Objects; /**
* <p>
* Date-Time: 2018/09/05 15:14
* Company: 百趣
* </p>
* 请求类型枚举
*
* @author fangyuanjie
* @version 1.0.0
*/ public enum MethodTypeEnum { // GET请求
GET("GET", "GET请求"), // POST请求
POST("POST", "POST请求"); private String code;
private String desc; public String getCode() {
return code;
} public void setCode(String code) {
this.code = code;
} public String getDesc() {
return desc;
} public void setDesc(String desc) {
this.desc = desc;
} MethodTypeEnum(String code, String desc) {
this.code = code;
this.desc = desc;
} public static MethodTypeEnum getByCode(String code) {
for (MethodTypeEnum methodTypeEnum : values()) {
if (Objects.equals(methodTypeEnum.getCode(), code)) {
return methodTypeEnum;
}
}
return null;
} }import com.alibaba.fastjson.JSONException;
import com.alibaba.fastjson.JSONObject;
import org.apache.commons.lang.StringUtils;
import org.apache.storm.trident.operation.BaseFilter;
import org.apache.storm.trident.tuple.TridentTuple; /**
* <p>
* Copyright: Copyright (c) 2018/9/10 14:28
* <p>
* Company: 百趣
* <p>
* 格式过滤
* @author tangzhe
* @version 1.0.0
*/
public class FormatFilter extends BaseFilter { @Override
public boolean isKeep(TridentTuple tuple) {
String message = tuple.getStringByField("str");
System.out.println(this.getClass().getSimpleName() + "->message:" + message);
if (StringUtils.isBlank(message)) {
System.out.println(this.getClass().getSimpleName() + ": 消息不能为空!");
return false;
}
JSONObject jsonObject;
try {
jsonObject = JSONObject.parseObject(message);
} catch (Exception e) {
System.out.println(this.getClass().getSimpleName() + ": 消息格式有误!");
return false;
}
if (jsonObject.getLong("reqTime") == null ||
jsonObject.getJSONObject("headers") == null ||
jsonObject.getString("reqURI") == null) {
System.out.println(this.getClass().getSimpleName() + ": 请求信息不能为空!");
return false;
}
try {
jsonObject.getJSONObject("headers");
jsonObject.getJSONObject("uriArgs");
jsonObject.getJSONObject("bodyData");
} catch (JSONException e) {
System.out.println(this.getClass().getSimpleName() + ": 请求信息格式有误!");
return false;
}
return true;
} }import com.alibaba.fastjson.JSONObject;
import net.baiqu.storm.trident.enums.MethodTypeEnum;
import org.apache.storm.trident.operation.BaseFunction;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Values; import java.util.Date; /**
* <p>
* Copyright: Copyright (c) 2018/9/10 14:34
* <p>
* Company: 百趣
* <p>
* 日志解析函数
* @author tangzhe
* @version 1.0.0
*/
public class OperateLogParseFunction extends BaseFunction { @Override
public void execute(TridentTuple tuple, TridentCollector collector) {
String message = tuple.getStringByField("str");
JSONObject jsonObject = JSONObject.parseObject(message);
System.out.println(this.getClass().getSimpleName() + "->message: " + message);
JSONObject headers = jsonObject.getJSONObject("headers");
JSONObject uriArgs = null;
String method = jsonObject.getString("method");
if (MethodTypeEnum.GET.getCode().equals(method)) {
uriArgs = jsonObject.getJSONObject("uriArgs");
} else if (MethodTypeEnum.POST.getCode().equals(method)) {
uriArgs = jsonObject.getJSONObject("bodyData");
}
uriArgs = uriArgs != null ? uriArgs : new JSONObject();
String appId = jsonObject.getString("appId");
String userId = uriArgs.getString("userId");
String ip = jsonObject.getString("ip");
String host = headers.getString("host");
String requestURI = jsonObject.getString("reqURI");
String username = uriArgs.getString("username");
String role = uriArgs.getString("role");
String memo = uriArgs.getString("memo");
Date requestTime = new Date(jsonObject.getLong("reqTime") * 1000);
collector.emit(new Values(appId, host, requestURI, method, ip, requestTime,
userId, username, role, memo, new Date()));
} }import org.apache.storm.trident.operation.BaseFunction;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.tuple.TridentTuple; /**
* <p>
* Copyright: Copyright (c) 2018/9/10 16:33
* <p>
* Company: 百趣
* <p>
* 结果记录函数
* @author tangzhe
* @version 1.0.0
*/
public class OperatePrintFunction extends BaseFunction { @Override
public void execute(TridentTuple input, TridentCollector collector) {
String result = input.getStringByField("result");
if ("success".equalsIgnoreCase(result)) {
System.out.println(this.getClass().getSimpleName() + "->: 插入mongo成功");
} else {
System.out.println(this.getClass().getSimpleName() + "->: 插入mongo失败");
}
}
}import org.apache.storm.mongodb.trident.state.MongoState;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.state.BaseStateUpdater;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Values; import java.util.List; /**
* <p>
* Copyright: Copyright (c) 2018/9/10 16:29
* <p>
* Company: 百趣
* <p>
*
* @author tangzhe
* @version 1.0.0
*/
public class MyMongoStateUpdater extends BaseStateUpdater<MongoState> { @Override
public void updateState(MongoState state, List<TridentTuple> tuples,
TridentCollector collector) {
try {
state.updateState(tuples, collector);
collector.emit(new Values("success"));
} catch (Exception e) {
e.printStackTrace();
collector.emit(new Values("fail"));
}
}
}import com.google.common.collect.Lists;
import com.mongodb.client.model.Filters;
import org.apache.commons.lang.Validate;
import org.apache.storm.mongodb.common.MongoDBClient;
import org.apache.storm.mongodb.common.mapper.MongoMapper;
import org.apache.storm.mongodb.trident.state.MongoState;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.state.State;
import org.apache.storm.trident.tuple.TridentTuple;
import org.bson.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import java.io.Serializable;
import java.util.List;
import java.util.Map; /**
* <p>
* Date-Time: 2018/09/10 13:50
* Company: 百趣
* </p>
*
* @author tangzhe
* @version 1.0.0
*/
public class OperateMongoState implements State { private static final Logger LOG = LoggerFactory.getLogger(MongoState.class); private OperateMongoState.Options options;
private MongoDBClient mongoClient;
private Map map; protected OperateMongoState(Map map, OperateMongoState.Options options) {
this.options = options;
this.map = map;
} public static class Options implements Serializable {
private String url;
private String collectionName;
private MongoMapper mapper; public OperateMongoState.Options withUrl(String url) {
this.url = url;
return this;
} public OperateMongoState.Options withCollectionName(String collectionName) {
this.collectionName = collectionName;
return this;
} public OperateMongoState.Options withMapper(MongoMapper mapper) {
this.mapper = mapper;
return this;
}
} protected void prepare() {
Validate.notEmpty(options.url, "url can not be blank or null");
Validate.notEmpty(options.collectionName, "collectionName can not be blank or null");
Validate.notNull(options.mapper, "MongoMapper can not be null"); this.mongoClient = new MongoDBClient(options.url, options.collectionName);
} @Override
public void beginCommit(Long txid) {
LOG.debug("beginCommit is noop.");
} @Override
public void commit(Long txid) {
LOG.debug("commit is noop.");
} public void updateState(List<TridentTuple> tuples, TridentCollector collector) {
List<Document> documents = Lists.newArrayList();
for (TridentTuple tuple : tuples) {
Document document = options.mapper.toDocument(tuple);
documents.add(document);
}
this.mongoClient.update(
Filters.eq("logDate",
tuples.get(0).getStringByField("logDate")),
new Document("$set", documents.get(0)), true);
} }import org.apache.storm.task.IMetricsContext;
import org.apache.storm.trident.state.State;
import org.apache.storm.trident.state.StateFactory; import java.util.Map; /**
* <p>
* Date-Time: 2018/09/10 13:50
* Company: 百趣
* </p>
*
* @author tangzhe
* @version 1.0.0
*/
public class OperateMongoStateFactory implements StateFactory { private OperateMongoState.Options options; public OperateMongoStateFactory(OperateMongoState.Options options) {
this.options = options;
} @Override
public State makeState(Map conf, IMetricsContext metrics,
int partitionIndex, int numPartitions) {
OperateMongoState state = new OperateMongoState(conf, options);
state.prepare();
return state;
} }
package net.baiqu.storm.trident.state; import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.state.BaseStateUpdater;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Values; import java.util.List; /**
* <p>
* Date-Time: 2018/09/10 13:50
* Company: 百趣
* </p>
*
* @author tangzhe
* @version 1.0.0
*/
public class OperateMongoStateUpdater extends BaseStateUpdater<OperateMongoState> { @Override
public void updateState(OperateMongoState state, List<TridentTuple> tuples, TridentCollector collector) {
state.updateState(tuples, collector);
String userId = tuples.get(0).getStringByField("userId");
collector.emit(new Values(userId));
} }
package net.baiqu.storm.trident.topology; import kafka.api.OffsetRequest;
import net.baiqu.storm.trident.filter.FormatFilter;
import net.baiqu.storm.trident.function.OperateLogParseFunction;
import net.baiqu.storm.trident.function.OperatePrintFunction;
import net.baiqu.storm.trident.state.MyMongoStateUpdater;
import net.baiqu.storm.trident.util.TridentMongoFactory;
import net.baiqu.storm.utils.Constants;
import org.apache.commons.lang.StringUtils;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.kafka.BrokerHosts;
import org.apache.storm.kafka.StringScheme;
import org.apache.storm.kafka.ZkHosts;
import org.apache.storm.kafka.trident.TransactionalTridentKafkaSpout;
import org.apache.storm.kafka.trident.TridentKafkaConfig;
import org.apache.storm.spout.SchemeAsMultiScheme;
import org.apache.storm.trident.Stream;
import org.apache.storm.trident.TridentTopology;
import org.apache.storm.tuple.Fields; /**
* <p>
* Date-Time: 2018/09/10 13:50
* Company: 百趣
* </p>
*
* @author tangzhe
* @version 1.0.0
*/
public class OperateLogTridentTopology { public static void main(String[] args) {
TridentTopology topology = new TridentTopology(); BrokerHosts hosts = new ZkHosts(Constants.ZK_HOSTS);
String topic = Constants.KAFKA_LOG_TOPIC;
String zkRoot = Constants.ZK_KAFKA_ROOT;
String id = Constants.KAFKA_SPOUT_ID; TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(hosts, topic, id);
kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); // demo模式设置读取偏移量的操作
if (StringUtils.equalsIgnoreCase("demo", Constants.MODE)) {
kafkaConfig.startOffsetTime = OffsetRequest.LatestTime();
} TransactionalTridentKafkaSpout kafkaSpout = new TransactionalTridentKafkaSpout(kafkaConfig); Stream stream = topology.newStream("kafkaSpout", kafkaSpout).parallelismHint(1);
stream.shuffle().each(new Fields("str"), new FormatFilter())
.parallelismHint(1)
.shuffle().each(new Fields("str"), new OperateLogParseFunction(),
new Fields("appId", "host", "requestURI", "method", "ip",
"requestTime", "userId", "username", "role", "memo", "logDate"))
.parallelismHint(1)
.partitionPersist(TridentMongoFactory.getMongoInsertState(),
new Fields("appId", "host", "requestURI", "method", "ip",
"requestTime", "userId", "username", "role", "memo", "logDate"),
new MyMongoStateUpdater(),
new Fields("result"))
.parallelismHint(1)
.newValuesStream().shuffle().each(
new Fields("result"), new OperatePrintFunction(), new Fields("none"))
.parallelismHint(1); Config config = new Config();
if (StringUtils.equalsIgnoreCase("demo", Constants.MODE)) {
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("operateLogTridentTopology", config, topology.build());
} else {
config.setNumWorkers(1);
config.put(Config.NIMBUS_HOST, Constants.NIMBUS_HOST);
config.put(Config.NIMBUS_THRIFT_PORT, Constants.NIMBUS_THRIFT_PORT);
config.put(Config.TOPOLOGY_ACKER_EXECUTORS, 1);
try {
StormSubmitter.submitTopology(args[0], config, topology.build());
} catch (Exception e) {
e.printStackTrace();
}
}
} }
package net.baiqu.storm.trident.util; import net.baiqu.storm.trident.state.OperateMongoState;
import net.baiqu.storm.trident.state.OperateMongoStateFactory;
import net.baiqu.storm.utils.Constants;
import org.apache.commons.lang.StringUtils;
import org.apache.storm.mongodb.common.mapper.MongoMapper;
import org.apache.storm.mongodb.common.mapper.SimpleMongoMapper;
import org.apache.storm.mongodb.trident.state.MongoState;
import org.apache.storm.mongodb.trident.state.MongoStateFactory;
import org.apache.storm.trident.state.StateFactory; /**
* <p>
* Copyright: Copyright (c) 2018/9/10 14:56
* <p>
* Company: 百趣
* <p>
* trident mongo 工厂类
* @author tangzhe
* @version 1.0.0
*/
public class TridentMongoFactory { public static final String URL = "mongodb://" + Constants.MONGODB_USERNAME + ":"
+ Constants.MONGODB_PASSWORD.replace("@", "%40")
+ "@" + Constants.MONGODB_HOSTS + ":" + Constants.MONGODB_PORT + "/"
+ Constants.MONGODB_DATABASE + "?connectTimeoutMS=" + Constants.MONGODB_TIMEOUT; public static final String URL2 = "mongodb://" + Constants.MONGODB_HOSTS + ":" + Constants.MONGODB_PORT + "/"
+ Constants.MONGODB_DATABASE + "?connectTimeoutMS=" + Constants.MONGODB_TIMEOUT; public static final String OPERATE_LOG_DB = "operate_log"; /**
* 使用自带state实现插入mongo
*/
public static StateFactory getMongoInsertState() {
String url = getUrl(); MongoMapper mapper = new SimpleMongoMapper()
.withFields("appId", "host", "requestURI", "method", "ip", "requestTime",
"userId", "username", "role", "memo", "logDate"); MongoState.Options options = new MongoState.Options()
.withUrl(url)
.withCollectionName(OPERATE_LOG_DB)
.withMapper(mapper); return new MongoStateFactory(options);
} /**
* 自定义state实现更新mongo
*/
public static StateFactory getMongoUpdateState() {
String url = getUrl();
MongoMapper mapper = new SimpleMongoMapper()
.withFields("appId", "host", "requestURI", "method", "ip", "requestTime",
"userId", "username", "role", "memo", "logDate"); OperateMongoState.Options options = new OperateMongoState.Options()
.withUrl(url)
.withCollectionName(OPERATE_LOG_DB)
.withMapper(mapper); return new OperateMongoStateFactory(options);
} /**
* 获取mongo url
*/
private static String getUrl() {
String url;
if (StringUtils.isNotBlank(Constants.MONGODB_USERNAME)) {
url = URL;
} else {
url = URL2;
}
return url;
} }
Trident整合MongoDB的更多相关文章
- spring MVC 整合mongodb
Spring Mongodb 目录 1 SPRING整合MONGODB 1 1.1 环境准备 1 1.2 包依赖 1 1.3 配置 2 2 案列 5 2.1 SPRING MVC整合MONGODB代码 ...
- MongoDB系列:四、spring整合mongodb,带用户验证
在前面的两篇博客 MongoDB常用操作练习.springboot整合mongoDB的简单demo中,我们基本上熟悉了mongodb,也把它与spring boot进行了整合并且简单使用.在本篇博客中 ...
- java操作mongodb & springboot整合mongodb
简单的研究原生API操作MongoDB以及封装的工具类操作,最后也会研究整合spring之后作为dao层的完整的操作. 1.原生的API操作 pom.xml <!-- https://mvnre ...
- SpringBoot整合mongoDB
MongoDB 是一个介于关系数据库和非关系数据库之间的产品,是非关系数据库当中功能最丰富,最像关系数据库的. 这一片文章介绍一个springboot整合mongodb,如果你了解整合mysql之类的 ...
- springboot 学习之路 14(整合mongodb的Api操作)
springboot整合mongodb: mongodb的安装和权限配置 请点击连接参考 mongodb集成 : 第一步:引如pom文件 第二步:配置文件配置mongodb路径: 第三步:关于mon ...
- SpringMVC整合Mongodb开发,高级操作
开发环境: 操作系统:windows xpMongodb:2.0.6依 赖 包:Spring3.2.2 + spring-data-mongodb-1.3.0 + Spring-data-1.5 + ...
- spring整合mongodb
使用spring整合mongodb maven 依赖 <dependency> <groupId>org.mongodb</groupId> <artifac ...
- SpringBoot非官方教程 | 第八篇:springboot整合mongodb
转载请标明出处: 原文首发于:https://www.fangzhipeng.com/springboot/2017/07/11/springboot8-mongodb/ 本文出自方志朋的博客 这篇文 ...
- Trident整合Kafka
首先编写一个打印函数KafkaPrintFunction import org.apache.storm.trident.operation.BaseFunction; import org.apac ...
随机推荐
- StringBuffer和StringBuilder区别?
1. String是不可变类,改变String变量中的值,相当于开辟了新的空间存放新的string变量 2. StringBuffer 可变的类,可以通过append方法改变变量的值,且StringB ...
- SSL Labs: Increased Penalty When TLS 1.2 Is Not Supported
https://community.qualys.com/blogs/securitylabs/2015/05/22/ssl-labs-increased-penalty-when-tls-12-is ...
- anaconda添加源(channels)
如果使用environment.yml下载conda环境时,系统很慢,那么可以增加其他的下载源: - https://conda.anaconda.org/menpo - https://mirror ...
- 【js基础修炼之路】- 手把手教你实现bind
手写bind前我们先回顾一下bind有哪些特性,以便更好的理解bind和实现bind. bind的特性 var obj = { a: 100, say(one, two) { console.log( ...
- Raknet—视频会议系统最佳的数据传输引擎
RakNet是一个跨平台的C++和C#的游戏引擎,它主要是为高效的数据传输而设计,使用者可以通过它进行游戏和其他的程序的开发.RakNet虽然是一个游戏引擎,但同样也是一个非常好的视频会议系统传输引擎 ...
- Android 中间白色渐变到看不见的线的Drawable
用gradient <gradient android:startColor="#00ffffff" android:centerColor="#ffffff&qu ...
- @RequiresPermissionss是否可以填写多种权限标识,只要满足其一就可以访问?
@RequiresPermissionss是否可以填写多种权限标识,只要满足其一就可以访问? 发布于 180天前 作者 qq_b02c4863 144 次浏览 复制 上一个帖子 下一个帖子 ...
- Android(java)学习笔记73:Intent启动Activity
1. Intent启动Activity案例 (1)首先是main.xml和other.xml文件如下: main.xml文件: <?xml version="1.0" enc ...
- Python 语法基础
之所以学习Python,第一个是他比较简单,寒假时间充裕,而且听说功能也很不错,最重要的是,我今年的项目就要用到它. 而且刘汝佳的书上说到,一个好的Acmer要是不会一点Python那就是太可惜了.废 ...
- CUDA线性内存分配
原文链接 概述:线性存储器可以通过cudaMalloc().cudaMallocPitch()和cudaMalloc3D()分配 1.1D线性内存分配 1 cudaMalloc(void**,int) ...