kafka 自定义分区器

package cn.xiaojf.kafka.producer;

import org.apache.kafka.clients.producer.Partitioner;

import org.apache.kafka.common.Cluster;

import org.apache.kafka.common.PartitionInfo;

import org.apache.kafka.common.utils.Utils;

import java.util.List;

import java.util.Map;

import java.util.Random;

import java.util.concurrent.ConcurrentHashMap;

import java.util.concurrent.ConcurrentMap;

import java.util.concurrent.atomic.AtomicInteger;

/**

 * 自定义分区方式

 */

public class CustomPartitioner implements Partitioner {

    private final ConcurrentMap<String, AtomicInteger> topicCounterMap = new ConcurrentHashMap();

    public CustomPartitioner() {

    }

    public void configure(Map<String, ?> configs) {

    }

    /**

     * 自定义分区规则

     * @param topic

     * @param key

     * @param keyBytes

     * @param value

     * @param valueBytes

     * @param cluster

     * @return

     */

    public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {

        List partitions = cluster.partitionsForTopic(topic);

        int numPartitions = partitions.size();

        if(keyBytes == null) {

            int nextValue = this.nextValue(topic);

            List availablePartitions = cluster.availablePartitionsForTopic(topic);

            if(availablePartitions.size() > 0) {

                int part = Utils.toPositive(nextValue) % availablePartitions.size();

                return ((PartitionInfo)availablePartitions.get(part)).partition();

            } else {

                return Utils.toPositive(nextValue) % numPartitions;

            }

        } else {

            return Utils.toPositive(Utils.murmur2(keyBytes)) % numPartitions;

        }

    }

    private int nextValue(String topic) {

        AtomicInteger counter = (AtomicInteger)this.topicCounterMap.get(topic);

        if(null == counter) {

            counter = new AtomicInteger((new Random()).nextInt());

            AtomicInteger currentCounter = (AtomicInteger)this.topicCounterMap.putIfAbsent(topic, counter);

            if(currentCounter != null) {

                counter = currentCounter;

            }

        }

        return counter.getAndIncrement();

    }

    public void close() {

    }

}

package cn.xiaojf.kafka.producer;

import org.apache.kafka.clients.producer.*;

import org.apache.kafka.common.Cluster;

import org.apache.kafka.common.PartitionInfo;

import org.apache.kafka.common.serialization.StringSerializer;

import org.apache.kafka.common.utils.Utils;

import java.util.*;

import java.util.concurrent.ConcurrentHashMap;

import java.util.concurrent.ConcurrentMap;

import java.util.concurrent.atomic.AtomicInteger;

/**

 * 消息生产者

 * @author xiaojf 2017/3/22 14:27

 */

public class MsgProducer extends Thread {

    private final KafkaProducer<String, String> producer;

    private final String topic;

    private final boolean isAsync;

    public MsgProducer(String topic, boolean isAsync) {

        Properties properties = new Properties();

        properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.59.130:9092");//broker 集群地址

        properties.put(ProducerConfig.CLIENT_ID_CONFIG, "MsgProducer");//自定义客户端id

        properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");//key 序列号方式

        properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");//value 序列号方式

        properties.put(ProducerConfig.PARTITIONER_CLASS_CONFIG,CustomPartitioner.class.getCanonicalName());//自定义分区函数

//        properties.load("properties配置文件");

        this.producer = new KafkaProducer<String, String>(properties);

        this.topic = topic;

        this.isAsync = isAsync;

    }

    @Override

    public void run() {

        int msgNo = 0;

        while (true) {

            String msg = "Msg: " + msgNo;

            String key = msgNo + "";

            if (isAsync) {//异步

                producer.send(new ProducerRecord<String, String>(this.topic,msg));

//                producer.send(new ProducerRecord<String, String>(this.topic, key, msg));

            } else {//同步

                producer.send(new ProducerRecord<String, String>(this.topic, key, msg),

                        new MsgProducerCallback(System.currentTimeMillis(), key, msg));

            }

        }

    }

    /**

     * 消息发送后的回调函数

     */

    class MsgProducerCallback implements Callback {

        private final long startTime;

        private final String key;

        private final String msg;

        public MsgProducerCallback(long startTime, String key, String msg) {

            this.startTime = startTime;

            this.key = key;

            this.msg = msg;

        }

        public void onCompletion(RecordMetadata recordMetadata, Exception e) {

            long elapsedTime = System.currentTimeMillis() - startTime;

            if (recordMetadata != null) {

                System.out.println(msg + " be sended to partition no : " + recordMetadata.partition());

            }

        }

    }

    public static void main(String args[]) {

        new MsgProducer("my-replicated-topic",true).start();//开始发送消息

    }

}

kafka 自定义分区器的更多相关文章

kafka自定义序列化器
<kafka权威指南> Customer.java public class Customer { private int customId; private String custome ...
spark自定义分区器实现
在spark中,框架默认使用的事hashPartitioner分区器进行对rdd分区,但是实际生产中,往往使用spark自带的分区器会产生数据倾斜等原因,这个时候就需要我们自定义分区,按照我们指定的字 ...
MapReduce之自定义分区器Partitioner
@ 目录问题引出默认Partitioner分区自定义Partitioner步骤 Partition分区案例实操分区总结问题引出要求将统计结果按照条件输出到不同文件中(分区). 比如:将统计 ...
玩转Kafka的生产者——分区器与多线程
上篇文章学习kafka的基本安装和基础概念,本文主要是学习kafka的常用API.其中包括生产者和消费者, 多线程生产者,多线程消费者,自定义分区等,当然还包括一些避坑指南. 首发于个人网站:链接地址 ...
kafka producer partitions分区器(七)
消息在经过拦截器.序列化后,就需要确定它发往哪个分区,如果在ProducerRecord中指定了partition字段,那么就不再需要partitioner分区器进行分区了,如果没有指定,那么会根据k ...
Spark源码分析之分区器的作用
最近因为手抖,在Spark中给自己挖了一个数据倾斜的坑.为了解决这个问题,顺便研究了下Spark分区器的原理,趁着周末加班总结一下~ 先说说数据倾斜数据倾斜是指Spark中的RDD在计算的时候,每个 ...
RDD(六)——分区器
RDD的分区器 Spark目前支持Hash分区和Range分区,用户也可以自定义分区,Hash分区为当前的默认分区,Spark中分区器直接决定了RDD中分区的个数.RDD中每条数据经过Shuffle过 ...
Kafka的接口回调 +自定义分区、拦截器
一.接口回调+自定义分区 1.接口回调:在使用消费者的send方法时添加Callback回调 producer.send(new ProducerRecord<String, String> ...
【Kafka】自定义分区策略
自定义分区策略思路 Command+Option+shift+N 调出查询页面,找到producer包的Partitioner接口 Partitioner下有一个DefaultPartitioner ...

随机推荐

基于Spring Security 的JSaaS应用的权限管理
1. 概述权限管理,一般指根据系统设置的安全规则或者安全策略,用户可以访问而且只能访问自己被授权的资源.资源包括访问的页面,访问的数据等,这在传统的应用系统中比较常见.本文介绍的则是基于Saas系统 ...
alert 和 console.log的区别
出走半月,一直以为 console.log 和 alert 的用法是一样的,只是表现的形式不同,alert 是以弹框的形式出现,console.log 是在后台打印输出. 但是今天在写东西的时候,发现 ...
transform-origin 的定位
transform-origin接受两个参数,它们可以是百分比,em,px等具体的值,也可以是left,center,right,或者 top,center,bottom等描述性参数,第一个参数表示X ...
为部署ASP.NET Core准备：使用Hyper-V安装Ubuntu Server 16.10
概述 Hyper-V是微软的一款虚拟化产品,和VMWare一样采用的hypervisor技术.它已经被内嵌到Win10系统内,我们只需要进行简单的安装即可.但是前提是要确保你的机器已经启用虚拟化,可以 ...
Spring事务管理的实现方式之编程式事务与声明式事务详解
原创说明:本博文为原创作品,绝非他处转载,转载请联系博主 1.上篇文章讲解了Spring事务的传播级别与隔离级别,以及分布式事务的简单配置,点击回看上篇文章 2.编程式事务:编码方式实现事务管理(代码 ...
JS模式--装饰者模式
在Javascript中动态的给对象添加职责的方式称作装饰者模式. 下面我们通常遇到的例子: var a = function () { alert(1); };//改成: var a = funct ...
基于python的知乎开源爬虫 zhihu_oauth使用介绍
今天在无意之中发现了一个知乎的开源爬虫,是基于Python的,名字叫zhihu_oauth,看了一下在github上面star数还挺多的,貌似文档也挺详细的,于是就稍微研究了一下.发现果然很好用啊.就 ...
Failed to read artifact descriptor for xxx:jar 的Maven项目jar包依赖配置的问题解决
在开发的过程中,尤其是新手,我们经常遇到Maven下载依赖jar包的问题,也就是遇到“Failed to read artifact descriptor for xxx:jar”的错误. 对于这种非 ...
使用IntelliJ IDEA开发SpringMVC网站（二）开发环境
访问GitHub下载最新源码:https://github.com/gaussic/SpringMVCDemo 文章已针对IDEA 2016做了一定的更新,部分更新较为重要,请重新阅读文章并下载最新源 ...
（原创）如何在性能测试中自动生成并获取Oracle AWR报告
版权声明:本文为原创文章,转载请先联系并标明出处由于日常使用最多的数据库为Oracle,因此,最近又打起了Oracle的AWR报告的主意. 过去我们执行测试,都是执行开始和结束分别手动建立一个快照, ...

kafka 自定义分区器

kafka 自定义分区器的更多相关文章

随机推荐

热门专题