Hbase之缓存扫描加快读取速度

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.TableName;

import org.apache.hadoop.hbase.client.*;

import org.apache.hadoop.hbase.client.metrics.ScanMetrics;

import java.io.IOException;

/**

 * Created by similarface on 16/8/23.

 */

public class ScanDataUseCache {

    private static Table table=null;

    public static Table getTable() {

        if(table==null){

            try {

                Configuration configuration = HBaseConfiguration.create();

                Connection connection = ConnectionFactory.createConnection(configuration);

                //建立表的连接

                return connection.getTable(TableName.valueOf("testtable"));

            }catch (IOException e){

                return table;

            }

        }

        return table;

    }

    private static void scan(int caching,int batch,boolean small) {

        int count=0;

        //setCaching 设置的值为每次rpc的请求记录数，默认是1；cache大可以优化性能，但是太大了会花费很长的时间进行一次传输。

        //setBatch 设置每次取的column size；有些row特别大，所以需要分开传给client，就是一次传一个row的几个column。

        //setSmall 是否为小扫描

        //setScanMetricsEnabled 使用了集合

        Scan scan = new Scan().setCaching(caching).setBatch(batch).setSmall(small).setScanMetricsEnabled(true);

        ResultScanner scanner=null;

        try {

            scanner = getTable().getScanner(scan);

        }catch (IOException e){

            System.out.println(e);

        }

        if (scanner!=null){

            for (Result result:scanner){

                count++;

            }

        scanner.close();

        ScanMetrics metrics = scan.getScanMetrics();

        System.out.println("Caching: " + caching + ", Batch: " + batch + ", Small: " + small + ", Results: " + count + ", RPCs: " + metrics.countOfRPCcalls);

        }

        else {

            System.out.println("Error");

        }

    }

    public static void main(String[] args) throws IOException {

        // Caching: 1, Batch: 1, Small: false, Results: 9, RPCs: 12

        scan(1, 1, false);

        //Caching: 1, Batch: 0, Small: false, Results: 4, RPCs: 7

        scan(1, 0, false);

        // Caching: 1, Batch: 0, Small: true, Results: 4, RPCs: 0

        scan(1, 0, true);

        //Caching: 200, Batch: 1, Small: false, Results: 9, RPCs: 3

        scan(200, 1, false);

        //Caching: 200, Batch: 0, Small: false, Results: 4, RPCs: 3

        scan(200, 0, false);

        //Caching: 200, Batch: 0, Small: true, Results: 4, RPCs: 0

        scan(200, 0, true);

        // Caching: 2000, Batch: 100, Small: false, Results: 4, RPCs: 3

        scan(2000, 100, false);

        // Caching: 2, Batch: 100, Small: false, Results: 4, RPCs: 5

        scan(2, 100, false);

        // Caching: 2, Batch: 10, Small: false, Results: 4, RPCs: 5

        scan(2, 10, false);

        // Caching: 2, Batch: 10, Small: false, Results: 4, RPCs: 5

        scan(5, 100, false);

        // Caching: 5, Batch: 100, Small: false, Results: 4, RPCs: 3

        scan(5, 20, false);

        // Caching: 10, Batch: 10, Small: false, Results: 4, RPCs: 3

        scan(10, 10, false);

    }

}

/**

 Caching: 1, Batch: 0, Small: false, Results: 5, RPCs: 8

 Caching: 1, Batch: 0, Small: true, Results: 5, RPCs: 0

 Caching: 200, Batch: 1, Small: false, Results: 1009, RPCs: 8

 Caching: 200, Batch: 0, Small: false, Results: 5, RPCs: 3

 Caching: 200, Batch: 0, Small: true, Results: 5, RPCs: 0

 Caching: 2000, Batch: 100, Small: false, Results: 14, RPCs: 3

 Caching: 2, Batch: 100, Small: false, Results: 14, RPCs: 10

 Caching: 2, Batch: 10, Small: false, Results: 104, RPCs: 55

 Caching: 5, Batch: 100, Small: false, Results: 14, RPCs: 5

 Caching: 5, Batch: 20, Small: false, Results: 54, RPCs: 13

 Caching: 10, Batch: 10, Small: false, Results: 104, RPCs: 13

 **/

这是一个9行数据的表

每行包含一些列

使用缓存为6 批量为3的扫描器

需要3个RPC

3个列装入一个Result实例

6个result到缓存中组成一个RPC

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.TableName;

import org.apache.hadoop.hbase.client.*;

import org.apache.hadoop.hbase.client.metrics.ScanMetrics;

import java.io.IOException;

/**

 * Created by similarface on 16/8/24.

 */

public class ScanWithOffsetAndLimit {

    private static Table table = null;

    public static Table getTable() {

        if (table == null) {

            try {

                Configuration configuration = HBaseConfiguration.create();

                Connection connection = ConnectionFactory.createConnection(configuration);

                //建立表的连接

                return connection.getTable(TableName.valueOf("testtable"));

            } catch (IOException e) {

                return table;

            }

        }

        return table;

    }

    /**

     * 遍历访问数据

     * @param num 运行次序

     * @param caching

     * @param batch

     * @param offset

     * @param maxResults

     * @param maxResultSize

     * @param dump

     * @throws IOException

     */

    private static void scan(int num, int caching, int batch, int offset, int maxResults, int maxResultSize, boolean dump

    ) throws IOException {

        int count = 0;

        Scan scan = new Scan().setCaching(caching).setBatch(batch)

                .setRowOffsetPerColumnFamily(offset)

                .setMaxResultsPerColumnFamily(maxResults)

                .setMaxResultSize(maxResultSize)

                .setScanMetricsEnabled(true);

        ResultScanner scanner = getTable().getScanner(scan);

        System.out.println("Scan #" + num + " running...");

        for (Result result : scanner) {

            count++;

            if (dump)

                System.out.println("Result [" + count + "]:" + result);

        }

        scanner.close();

        ScanMetrics metrics = scan.getScanMetrics();

        System.out.println("Caching: " + caching + ", Batch: " + batch +

                ", Offset: " + offset + ", maxResults: " + maxResults +

                ", maxSize: " + maxResultSize + ", Results: " + count +

                ", RPCs: " + metrics.countOfRPCcalls);

    }

    public static void main(String[] args) throws IOException {

        //偏移为0 最大2个cell 所以会扫描到列1 和列2

        scan(1, 11, 0, 0, 2, -1, true);

        //偏移为4 最大2个cell 所以会扫描到列5 和列6

        scan(2, 11, 0, 4, 2, -1, true);

        //

        scan(3, 5, 0, 0, 2, -1, false);

        scan(4, 11, 2, 0, 5, -1, true);

        scan(5, 11, -1, -1, -1, 1, false);

        scan(6, 11, -1, -1, -1, 10000, false);

    }

}

/**

 Caching: 11, Batch: 0, Offset: 0, maxResults: 2, maxSize: -1, Results: 5005, RPCs: 458

 Caching: 11, Batch: 0, Offset: 4, maxResults: 2, maxSize: -1, Results: 1, RPCs: 3

 Caching: 5, Batch: 0, Offset: 0, maxResults: 2, maxSize: -1, Results: 5005, RPCs: 1004

 Caching: 11, Batch: 2, Offset: 0, maxResults: 5, maxSize: -1, Results: 5009, RPCs: 458

 Caching: 11, Batch: -1, Offset: -1, maxResults: -1, maxSize: 1, Results: 5005, RPCs: 11012

 Caching: 11, Batch: -1, Offset: -1, maxResults: -1, maxSize: 10000, Results: 5005, RPCs: 469

**/

Hbase之缓存扫描加快读取速度的更多相关文章

ASP.NET状缓存Cache的应用-提高数据库读取速度
原文:ASP.NET状缓存Cache的应用-提高数据库读取速度一. Cache概述既然缓存中的数据其实是来自数据库的,那么缓存中的数据如何和数据库进行同步呢?一般来说,缓存中应该存放改 ...
优化SQLServer数据库加快查询速度
查询速度慢的原因很多,常见如下几种: 1.没有索引或者没有用到索引(这是查询慢最常见的问题,是程序设计的缺陷) 2.I/O吞吐量小,形成了瓶颈效应. 3.没有创建计算列导致查询不优化. 4.内存不足 ...
使用Openresty加快网页速度
新年快乐~~~ 上一篇文章讲到使用多级缓存来减少数据库的访问来加快网页的速度,只是,仍旧没有"嗖"一下就加载出来的感觉,想再优化一下,优化代码什么的已经到了极限.上周无意中看到了o ...
mysql千万级数据库插入速度和读取速度的调整记录
一般情况下mysql上百万数据读取和插入更新是没什么问题了,但到了上千万级就会出现很慢,下面我们来看mysql千万级数据库插入速度和读取速度的调整记录吧. 1)提高数据库插入性能中心思想:尽量将数据一 ...
数据读取速度达1.5G/s，UFS 2.1存储技术曝光
目前最快的是苹果NVME,当然UFS2.1也不差 iPhone6s与iPhone6s Plus在硬件的规格上有了很大的提升,但是它们身上的变化远没有苹果在发布会上所提到的A9处理器.1200万摄像头以 ...
160304-01、mysql数据库插入速度和读取速度的调整记录
需求:由于项目变态,需要在一个比较短时间段急剧增加数据库记录(两三天内,由于0增加至5亿).在整个过程调优过程非常艰辛思路: (1)提高数据库插入性能中心思想:尽量将数据一次性写入到Data Fil ...
mysql千万级数据库插入速度和读取速度的调整
mysql上百万数据读取和插入更新一般没什么问题,但上千万后速度会很慢,如何调整配置,提高效率.如下: 1.尽量将数据一次性写入DataFile和减少数据库的checkpoint操作,调整如下参数: ...
Linux检测硬盘读取速度
1. 清空缓存 > /proc/sys/vm/drop_caches 2. 测试读取速度 a. 将/dev/zero中数据按1M的数据单位写入testfile,共写512个单位,并不通过缓存 c ...
Android开发之制作圆形头像自定义View,直接引用工具类，加快开发速度。带有源代码学习
作者:程序员小冰,CSDN博客:http://blog.csdn.net/qq_21376985 QQ986945193 博客园主页:http://www.cnblogs.com/mcxiaobing ...

随机推荐

oracle 导出导入常见问题
oracle 导入导出常见有两种方法 EXP和IMP是客户端工具程序,它们既可以在客户端使用,也可以在服务端使用.EXPDP和IMPDP是服务端的工具程序,他们只能在ORACLE服务端使用,不能在客户 ...
An error occurred while filtering resources-----maven项目报错
解决办法需要在pom中设定jdk的版本 <plugins>  <plugin> <groupId&g ...
SqlSever基础 union 与 union all的区别，即重复项是否全部显示
镇场诗:---大梦谁觉,水月中建博客.百千磨难,才知世事无常.---今持佛语,技术无量愿学.愿尽所学,铸一良心博客.------------------------------------------ ...
SqlSever基础 group by之后，加having 对分组之后的数据在进行处理
镇场诗:---大梦谁觉,水月中建博客.百千磨难,才知世事无常.---今持佛语,技术无量愿学.愿尽所学,铸一良心博客.------------------------------------------ ...
两个乒乓球队进行比赛，各出三人。甲队为a,b,c三人，乙队为x,y,z三人。已抽签决定比赛名单。有人向队员打听比赛的名单。a说他不和x比，c说他不和x,z比，请编程序找出三队赛手的名单。
package C; public class Bisai { public static void main(String[] args) { String a="xyz",b= ...
BZOJ 3171 循环格（费用流）
题目链接:http://61.187.179.132/JudgeOnline/problem.php?id=3171 题意: 思路:若能构成循环,则每个格子的入度出度均为1.因此将每个点拆成两个点x ...
高仿bootstrap的layout效果（一）
公司研发一个新的cms,为了减少以后的修改和尽可能大程度的满足客户对cms的灵活需求,我的经理安排我去做一个与bootstrap的layout差不多的效果,这叫什么,锻炼的时候来了,加油,这个急不得一 ...
C# 多线程 Invoke BeginInvoke
Invoke在线程中等待Dispatcher调用指定方法,完成后继续下面的操作. BeginInvoke不必等待Dispatcher调用制定方法,直接继续下面的操作. 来自:百度知道这个在线程中操作 ...
Spring整合Hibernate图文步骤
首先建立java Project工程点击Finish完成添加Hibernate和Spring所需要的jar包还有Mysql连接的jar包创建Dao层,Dao层实现,Model层,Service层 ...
多命令顺序执行、管道符 ; && || |
多命令顺序执行:

Hbase之缓存扫描加快读取速度

Hbase之缓存扫描加快读取速度的更多相关文章

随机推荐

热门专题