MongoDB是大数据技术中常用的NoSql型数据库,它提供的大量的查询、聚合等操作函数,对于大量查询的日志系统来说,该MongoDB是大数据日志存储的福音。Storm的高级编程技术Trident,也提供了与Mongo集成的方法,但官方只提供了新增的处理,对于常用的修改操作并未提供接口,本文提供了一种使用Trident进行mongoDB修改操作的方式,并且对持久化的数据提供了输出的拓展操作,具体代码见下方:

import java.util.Objects;

/**
* <p>
* Date-Time: 2018/09/05 15:14
* Company: 百趣
* </p>
* 请求类型枚举
*
* @author fangyuanjie
* @version 1.0.0
*/ public enum MethodTypeEnum { // GET请求
GET("GET", "GET请求"), // POST请求
POST("POST", "POST请求"); private String code;
private String desc; public String getCode() {
return code;
} public void setCode(String code) {
this.code = code;
} public String getDesc() {
return desc;
} public void setDesc(String desc) {
this.desc = desc;
} MethodTypeEnum(String code, String desc) {
this.code = code;
this.desc = desc;
} public static MethodTypeEnum getByCode(String code) {
for (MethodTypeEnum methodTypeEnum : values()) {
if (Objects.equals(methodTypeEnum.getCode(), code)) {
return methodTypeEnum;
}
}
return null;
} }import com.alibaba.fastjson.JSONException;
import com.alibaba.fastjson.JSONObject;
import org.apache.commons.lang.StringUtils;
import org.apache.storm.trident.operation.BaseFilter;
import org.apache.storm.trident.tuple.TridentTuple; /**
* <p>
* Copyright: Copyright (c) 2018/9/10 14:28
* <p>
* Company: 百趣
* <p>
* 格式过滤
* @author tangzhe
* @version 1.0.0
*/
public class FormatFilter extends BaseFilter { @Override
public boolean isKeep(TridentTuple tuple) {
String message = tuple.getStringByField("str");
System.out.println(this.getClass().getSimpleName() + "->message:" + message);
if (StringUtils.isBlank(message)) {
System.out.println(this.getClass().getSimpleName() + ": 消息不能为空!");
return false;
}
JSONObject jsonObject;
try {
jsonObject = JSONObject.parseObject(message);
} catch (Exception e) {
System.out.println(this.getClass().getSimpleName() + ": 消息格式有误!");
return false;
}
if (jsonObject.getLong("reqTime") == null ||
jsonObject.getJSONObject("headers") == null ||
jsonObject.getString("reqURI") == null) {
System.out.println(this.getClass().getSimpleName() + ": 请求信息不能为空!");
return false;
}
try {
jsonObject.getJSONObject("headers");
jsonObject.getJSONObject("uriArgs");
jsonObject.getJSONObject("bodyData");
} catch (JSONException e) {
System.out.println(this.getClass().getSimpleName() + ": 请求信息格式有误!");
return false;
}
return true;
} }import com.alibaba.fastjson.JSONObject;
import net.baiqu.storm.trident.enums.MethodTypeEnum;
import org.apache.storm.trident.operation.BaseFunction;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Values; import java.util.Date; /**
* <p>
* Copyright: Copyright (c) 2018/9/10 14:34
* <p>
* Company: 百趣
* <p>
* 日志解析函数
* @author tangzhe
* @version 1.0.0
*/
public class OperateLogParseFunction extends BaseFunction { @Override
public void execute(TridentTuple tuple, TridentCollector collector) {
String message = tuple.getStringByField("str");
JSONObject jsonObject = JSONObject.parseObject(message);
System.out.println(this.getClass().getSimpleName() + "->message: " + message);
JSONObject headers = jsonObject.getJSONObject("headers");
JSONObject uriArgs = null;
String method = jsonObject.getString("method");
if (MethodTypeEnum.GET.getCode().equals(method)) {
uriArgs = jsonObject.getJSONObject("uriArgs");
} else if (MethodTypeEnum.POST.getCode().equals(method)) {
uriArgs = jsonObject.getJSONObject("bodyData");
}
uriArgs = uriArgs != null ? uriArgs : new JSONObject();
String appId = jsonObject.getString("appId");
String userId = uriArgs.getString("userId");
String ip = jsonObject.getString("ip");
String host = headers.getString("host");
String requestURI = jsonObject.getString("reqURI");
String username = uriArgs.getString("username");
String role = uriArgs.getString("role");
String memo = uriArgs.getString("memo");
Date requestTime = new Date(jsonObject.getLong("reqTime") * 1000);
collector.emit(new Values(appId, host, requestURI, method, ip, requestTime,
userId, username, role, memo, new Date()));
} }import org.apache.storm.trident.operation.BaseFunction;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.tuple.TridentTuple; /**
* <p>
* Copyright: Copyright (c) 2018/9/10 16:33
* <p>
* Company: 百趣
* <p>
* 结果记录函数
* @author tangzhe
* @version 1.0.0
*/
public class OperatePrintFunction extends BaseFunction { @Override
public void execute(TridentTuple input, TridentCollector collector) {
String result = input.getStringByField("result");
if ("success".equalsIgnoreCase(result)) {
System.out.println(this.getClass().getSimpleName() + "->: 插入mongo成功");
} else {
System.out.println(this.getClass().getSimpleName() + "->: 插入mongo失败");
}
}
}import org.apache.storm.mongodb.trident.state.MongoState;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.state.BaseStateUpdater;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Values; import java.util.List; /**
* <p>
* Copyright: Copyright (c) 2018/9/10 16:29
* <p>
* Company: 百趣
* <p>
*
* @author tangzhe
* @version 1.0.0
*/
public class MyMongoStateUpdater extends BaseStateUpdater<MongoState> { @Override
public void updateState(MongoState state, List<TridentTuple> tuples,
TridentCollector collector) {
try {
state.updateState(tuples, collector);
collector.emit(new Values("success"));
} catch (Exception e) {
e.printStackTrace();
collector.emit(new Values("fail"));
}
}
}import com.google.common.collect.Lists;
import com.mongodb.client.model.Filters;
import org.apache.commons.lang.Validate;
import org.apache.storm.mongodb.common.MongoDBClient;
import org.apache.storm.mongodb.common.mapper.MongoMapper;
import org.apache.storm.mongodb.trident.state.MongoState;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.state.State;
import org.apache.storm.trident.tuple.TridentTuple;
import org.bson.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import java.io.Serializable;
import java.util.List;
import java.util.Map; /**
* <p>
* Date-Time: 2018/09/10 13:50
* Company: 百趣
* </p>
*
* @author tangzhe
* @version 1.0.0
*/
public class OperateMongoState implements State { private static final Logger LOG = LoggerFactory.getLogger(MongoState.class); private OperateMongoState.Options options;
private MongoDBClient mongoClient;
private Map map; protected OperateMongoState(Map map, OperateMongoState.Options options) {
this.options = options;
this.map = map;
} public static class Options implements Serializable {
private String url;
private String collectionName;
private MongoMapper mapper; public OperateMongoState.Options withUrl(String url) {
this.url = url;
return this;
} public OperateMongoState.Options withCollectionName(String collectionName) {
this.collectionName = collectionName;
return this;
} public OperateMongoState.Options withMapper(MongoMapper mapper) {
this.mapper = mapper;
return this;
}
} protected void prepare() {
Validate.notEmpty(options.url, "url can not be blank or null");
Validate.notEmpty(options.collectionName, "collectionName can not be blank or null");
Validate.notNull(options.mapper, "MongoMapper can not be null"); this.mongoClient = new MongoDBClient(options.url, options.collectionName);
} @Override
public void beginCommit(Long txid) {
LOG.debug("beginCommit is noop.");
} @Override
public void commit(Long txid) {
LOG.debug("commit is noop.");
} public void updateState(List<TridentTuple> tuples, TridentCollector collector) {
List<Document> documents = Lists.newArrayList();
for (TridentTuple tuple : tuples) {
Document document = options.mapper.toDocument(tuple);
documents.add(document);
}
this.mongoClient.update(
Filters.eq("logDate",
tuples.get(0).getStringByField("logDate")),
new Document("$set", documents.get(0)), true);
} }import org.apache.storm.task.IMetricsContext;
import org.apache.storm.trident.state.State;
import org.apache.storm.trident.state.StateFactory; import java.util.Map; /**
* <p>
* Date-Time: 2018/09/10 13:50
* Company: 百趣
* </p>
*
* @author tangzhe
* @version 1.0.0
*/
public class OperateMongoStateFactory implements StateFactory { private OperateMongoState.Options options; public OperateMongoStateFactory(OperateMongoState.Options options) {
this.options = options;
} @Override
public State makeState(Map conf, IMetricsContext metrics,
int partitionIndex, int numPartitions) {
OperateMongoState state = new OperateMongoState(conf, options);
state.prepare();
return state;
} }
package net.baiqu.storm.trident.state; import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.state.BaseStateUpdater;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Values; import java.util.List; /**
* <p>
* Date-Time: 2018/09/10 13:50
* Company: 百趣
* </p>
*
* @author tangzhe
* @version 1.0.0
*/
public class OperateMongoStateUpdater extends BaseStateUpdater<OperateMongoState> { @Override
public void updateState(OperateMongoState state, List<TridentTuple> tuples, TridentCollector collector) {
state.updateState(tuples, collector);
String userId = tuples.get(0).getStringByField("userId");
collector.emit(new Values(userId));
} }
package net.baiqu.storm.trident.topology; import kafka.api.OffsetRequest;
import net.baiqu.storm.trident.filter.FormatFilter;
import net.baiqu.storm.trident.function.OperateLogParseFunction;
import net.baiqu.storm.trident.function.OperatePrintFunction;
import net.baiqu.storm.trident.state.MyMongoStateUpdater;
import net.baiqu.storm.trident.util.TridentMongoFactory;
import net.baiqu.storm.utils.Constants;
import org.apache.commons.lang.StringUtils;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.kafka.BrokerHosts;
import org.apache.storm.kafka.StringScheme;
import org.apache.storm.kafka.ZkHosts;
import org.apache.storm.kafka.trident.TransactionalTridentKafkaSpout;
import org.apache.storm.kafka.trident.TridentKafkaConfig;
import org.apache.storm.spout.SchemeAsMultiScheme;
import org.apache.storm.trident.Stream;
import org.apache.storm.trident.TridentTopology;
import org.apache.storm.tuple.Fields; /**
* <p>
* Date-Time: 2018/09/10 13:50
* Company: 百趣
* </p>
*
* @author tangzhe
* @version 1.0.0
*/
public class OperateLogTridentTopology { public static void main(String[] args) {
TridentTopology topology = new TridentTopology(); BrokerHosts hosts = new ZkHosts(Constants.ZK_HOSTS);
String topic = Constants.KAFKA_LOG_TOPIC;
String zkRoot = Constants.ZK_KAFKA_ROOT;
String id = Constants.KAFKA_SPOUT_ID; TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(hosts, topic, id);
kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); // demo模式设置读取偏移量的操作
if (StringUtils.equalsIgnoreCase("demo", Constants.MODE)) {
kafkaConfig.startOffsetTime = OffsetRequest.LatestTime();
} TransactionalTridentKafkaSpout kafkaSpout = new TransactionalTridentKafkaSpout(kafkaConfig); Stream stream = topology.newStream("kafkaSpout", kafkaSpout).parallelismHint(1);
stream.shuffle().each(new Fields("str"), new FormatFilter())
.parallelismHint(1)
.shuffle().each(new Fields("str"), new OperateLogParseFunction(),
new Fields("appId", "host", "requestURI", "method", "ip",
"requestTime", "userId", "username", "role", "memo", "logDate"))
.parallelismHint(1)
.partitionPersist(TridentMongoFactory.getMongoInsertState(),
new Fields("appId", "host", "requestURI", "method", "ip",
"requestTime", "userId", "username", "role", "memo", "logDate"),
new MyMongoStateUpdater(),
new Fields("result"))
.parallelismHint(1)
.newValuesStream().shuffle().each(
new Fields("result"), new OperatePrintFunction(), new Fields("none"))
.parallelismHint(1); Config config = new Config();
if (StringUtils.equalsIgnoreCase("demo", Constants.MODE)) {
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("operateLogTridentTopology", config, topology.build());
} else {
config.setNumWorkers(1);
config.put(Config.NIMBUS_HOST, Constants.NIMBUS_HOST);
config.put(Config.NIMBUS_THRIFT_PORT, Constants.NIMBUS_THRIFT_PORT);
config.put(Config.TOPOLOGY_ACKER_EXECUTORS, 1);
try {
StormSubmitter.submitTopology(args[0], config, topology.build());
} catch (Exception e) {
e.printStackTrace();
}
}
} }
package net.baiqu.storm.trident.util; import net.baiqu.storm.trident.state.OperateMongoState;
import net.baiqu.storm.trident.state.OperateMongoStateFactory;
import net.baiqu.storm.utils.Constants;
import org.apache.commons.lang.StringUtils;
import org.apache.storm.mongodb.common.mapper.MongoMapper;
import org.apache.storm.mongodb.common.mapper.SimpleMongoMapper;
import org.apache.storm.mongodb.trident.state.MongoState;
import org.apache.storm.mongodb.trident.state.MongoStateFactory;
import org.apache.storm.trident.state.StateFactory; /**
* <p>
* Copyright: Copyright (c) 2018/9/10 14:56
* <p>
* Company: 百趣
* <p>
* trident mongo 工厂类
* @author tangzhe
* @version 1.0.0
*/
public class TridentMongoFactory { public static final String URL = "mongodb://" + Constants.MONGODB_USERNAME + ":"
+ Constants.MONGODB_PASSWORD.replace("@", "%40")
+ "@" + Constants.MONGODB_HOSTS + ":" + Constants.MONGODB_PORT + "/"
+ Constants.MONGODB_DATABASE + "?connectTimeoutMS=" + Constants.MONGODB_TIMEOUT; public static final String URL2 = "mongodb://" + Constants.MONGODB_HOSTS + ":" + Constants.MONGODB_PORT + "/"
+ Constants.MONGODB_DATABASE + "?connectTimeoutMS=" + Constants.MONGODB_TIMEOUT; public static final String OPERATE_LOG_DB = "operate_log"; /**
* 使用自带state实现插入mongo
*/
public static StateFactory getMongoInsertState() {
String url = getUrl(); MongoMapper mapper = new SimpleMongoMapper()
.withFields("appId", "host", "requestURI", "method", "ip", "requestTime",
"userId", "username", "role", "memo", "logDate"); MongoState.Options options = new MongoState.Options()
.withUrl(url)
.withCollectionName(OPERATE_LOG_DB)
.withMapper(mapper); return new MongoStateFactory(options);
} /**
* 自定义state实现更新mongo
*/
public static StateFactory getMongoUpdateState() {
String url = getUrl();
MongoMapper mapper = new SimpleMongoMapper()
.withFields("appId", "host", "requestURI", "method", "ip", "requestTime",
"userId", "username", "role", "memo", "logDate"); OperateMongoState.Options options = new OperateMongoState.Options()
.withUrl(url)
.withCollectionName(OPERATE_LOG_DB)
.withMapper(mapper); return new OperateMongoStateFactory(options);
} /**
* 获取mongo url
*/
private static String getUrl() {
String url;
if (StringUtils.isNotBlank(Constants.MONGODB_USERNAME)) {
url = URL;
} else {
url = URL2;
}
return url;
} }

Trident整合MongoDB的更多相关文章

  1. spring MVC 整合mongodb

    Spring Mongodb 目录 1 SPRING整合MONGODB 1 1.1 环境准备 1 1.2 包依赖 1 1.3 配置 2 2 案列 5 2.1 SPRING MVC整合MONGODB代码 ...

  2. MongoDB系列:四、spring整合mongodb,带用户验证

    在前面的两篇博客 MongoDB常用操作练习.springboot整合mongoDB的简单demo中,我们基本上熟悉了mongodb,也把它与spring boot进行了整合并且简单使用.在本篇博客中 ...

  3. java操作mongodb & springboot整合mongodb

    简单的研究原生API操作MongoDB以及封装的工具类操作,最后也会研究整合spring之后作为dao层的完整的操作. 1.原生的API操作 pom.xml <!-- https://mvnre ...

  4. SpringBoot整合mongoDB

    MongoDB 是一个介于关系数据库和非关系数据库之间的产品,是非关系数据库当中功能最丰富,最像关系数据库的. 这一片文章介绍一个springboot整合mongodb,如果你了解整合mysql之类的 ...

  5. springboot 学习之路 14(整合mongodb的Api操作)

    springboot整合mongodb: mongodb的安装和权限配置  请点击连接参考 mongodb集成 : 第一步:引如pom文件 第二步:配置文件配置mongodb路径: 第三步:关于mon ...

  6. SpringMVC整合Mongodb开发,高级操作

    开发环境: 操作系统:windows xpMongodb:2.0.6依 赖 包:Spring3.2.2 + spring-data-mongodb-1.3.0 + Spring-data-1.5 +  ...

  7. spring整合mongodb

    使用spring整合mongodb maven 依赖 <dependency> <groupId>org.mongodb</groupId> <artifac ...

  8. SpringBoot非官方教程 | 第八篇:springboot整合mongodb

    转载请标明出处: 原文首发于:https://www.fangzhipeng.com/springboot/2017/07/11/springboot8-mongodb/ 本文出自方志朋的博客 这篇文 ...

  9. Trident整合Kafka

    首先编写一个打印函数KafkaPrintFunction import org.apache.storm.trident.operation.BaseFunction; import org.apac ...

随机推荐

  1. phpStudy-FTP_Server插件安装使用教程

    FileZilla Server使用教程 ftp server安装教程 除了phpStudy for IIS外其他版本phpStudy不再集成ftp server外. phpStudy for IIS ...

  2. POJ-2481 Cows---树状数组的运用

    题目链接: https://vjudge.net/problem/POJ-2481 题目大意: if Si <= Sj and Ej <= Ei and Ei - Si > Ej - ...

  3. 前端高质量知识(二)-JS执行上下文(执行环境)详细图解Script

    先随便放张图 我们在JS学习初期或者面试的时候常常会遇到考核变量提升的思考题.比如先来一个简单一点的. console.log(a); // 这里会打印出什么? var a = 20; PS: 变量提 ...

  4. Poj(1703),种类并查集

    题目链接:http://poj.org/problem?id=1703 已经不是第一次接触种类并查集了,直到今天才搞懂. 感谢红黑联盟,感谢杰哥!!! 每个节点只要关系确定,不管是不是同一个集合里面, ...

  5. 【转】startActivityForResult和setResult详解

    startActivityForResult与startActivity的不同之处在于:1.startActivity( ) 仅仅是跳转到目标页面,若是想跳回当前页面,则必须再使用一次startAct ...

  6. gearman安装实录

    花了5个小时装好了gearman,问题不断,坑爹的服务器yum还坏了,悲催. 服务器系统:centos5.3 64位 gearman版本:1.1.8 安装包(相关依赖)下载 1.gearman安装包 ...

  7. Docker中的三个基本概念容器(container)、镜像(image)和仓库(registry)之间有什么关系?

    Docker镜像是一个特殊的文件系统,除了提供容器运行时所需的程序.库.资源.配置等文件外,还包含了一些为运行时准备的一些配置参数(如匿名卷.环境变量.用户等).镜像不包含任何动态数据,其内容在构建之 ...

  8. 用到UdpClient的一点经验

    Thread.Abort对UdpClient.Receive阻塞的线程无效 http://computer-programming-forum.com/4-csharp/184f9d4ee63704f ...

  9. currency 过滤器

    <!DOCTYPE html><html><head><meta http-equiv="Content-Type" content=&q ...

  10. dubbo 与Spring Cloud 对比

    链接:https://www.zhihu.com/question/45413135/answer/242224410 近期也看到一些分享Spring Cloud的相关实施经验,这对于最近正在整理Sp ...