HBase源码实战:CreateRandomStoreFile
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver; import java.io.IOException;
import java.util.Arrays;
import java.util.Random; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
import org.apache.hadoop.hbase.util.BloomFilterFactory;
import org.apache.hadoop.io.BytesWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser;
import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter;
import org.apache.hbase.thirdparty.org.apache.commons.cli.Options;
import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException;
import org.apache.hbase.thirdparty.org.apache.commons.cli.PosixParser; /**
* Creates an HFile with random key/value pairs.
*/
public class CreateRandomStoreFile { /**
* As much as this number of bytes can be added or subtracted from key/value
* lengths.
*/
private static final int LEN_VARIATION = 5; private static final Logger LOG =
LoggerFactory.getLogger(CreateRandomStoreFile.class);
private static final String OUTPUT_DIR_OPTION = "o";
private static final String NUM_KV_OPTION = "n";
private static final String HFILE_VERSION_OPTION = "h";
private static final String KEY_SIZE_OPTION = "k";
private static final String VALUE_SIZE_OPTION = "v";
private static final String COMPRESSION_OPTION = "c";
private static final String BLOOM_FILTER_OPTION = "bf";
private static final String BLOCK_SIZE_OPTION = "bs";
private static final String BLOOM_BLOCK_SIZE_OPTION = "bfbs";
private static final String INDEX_BLOCK_SIZE_OPTION = "ibs"; /** The exit code this command-line tool returns on failure */
private static final int EXIT_FAILURE = 1; /** The number of valid key types in a store file */
private static final int NUM_VALID_KEY_TYPES =
KeyValue.Type.values().length - 2; private Options options = new Options(); private int keyPrefixLen, keyLen, rowLen, cfLen, valueLen;
private Random rand; /**
* Runs the tools.
*
* @param args command-line arguments
* @return true in case of success
* @throws IOException
*/
public boolean run(String[] args) throws IOException {
options.addOption(OUTPUT_DIR_OPTION, "output_dir", true,
"Output directory");
options.addOption(NUM_KV_OPTION, "num_kv", true,
"Number of key/value pairs");
options.addOption(KEY_SIZE_OPTION, "key_size", true, "Average key size");
options.addOption(VALUE_SIZE_OPTION, "value_size", true,
"Average value size");
options.addOption(HFILE_VERSION_OPTION, "hfile_version", true,
"HFile version to create");
options.addOption(COMPRESSION_OPTION, "compression", true,
" Compression type, one of "
+ Arrays.toString(Compression.Algorithm.values()));
options.addOption(BLOOM_FILTER_OPTION, "bloom_filter", true,
"Bloom filter type, one of "
+ Arrays.toString(BloomType.values()));
options.addOption(BLOCK_SIZE_OPTION, "block_size", true,
"HFile block size");
options.addOption(BLOOM_BLOCK_SIZE_OPTION, "bloom_block_size", true,
"Compound Bloom filters block size");
options.addOption(INDEX_BLOCK_SIZE_OPTION, "index_block_size", true,
"Index block size"); if (args.length == 0) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp(CreateRandomStoreFile.class.getSimpleName(), options,
true);
return false;
} CommandLineParser parser = new PosixParser();
CommandLine cmdLine;
try {
cmdLine = parser.parse(options, args);
} catch (ParseException ex) {
LOG.error(ex.toString(), ex);
return false;
} if (!cmdLine.hasOption(OUTPUT_DIR_OPTION)) {
LOG.error("Output directory is not specified");
return false;
} if (!cmdLine.hasOption(NUM_KV_OPTION)) {
LOG.error("The number of keys/values not specified");
return false;
} if (!cmdLine.hasOption(KEY_SIZE_OPTION)) {
LOG.error("Key size is not specified");
return false;
} if (!cmdLine.hasOption(VALUE_SIZE_OPTION)) {
LOG.error("Value size not specified");
return false;
} Configuration conf = HBaseConfiguration.create(); Path outputDir = new Path(cmdLine.getOptionValue(OUTPUT_DIR_OPTION)); long numKV = Long.parseLong(cmdLine.getOptionValue(NUM_KV_OPTION));
configureKeyValue(numKV,
Integer.parseInt(cmdLine.getOptionValue(KEY_SIZE_OPTION)),
Integer.parseInt(cmdLine.getOptionValue(VALUE_SIZE_OPTION))); FileSystem fs = FileSystem.get(conf); Compression.Algorithm compr = Compression.Algorithm.NONE;
if (cmdLine.hasOption(COMPRESSION_OPTION)) {
compr = Compression.Algorithm.valueOf(
cmdLine.getOptionValue(COMPRESSION_OPTION));
} BloomType bloomType = BloomType.NONE;
if (cmdLine.hasOption(BLOOM_FILTER_OPTION)) {
bloomType = BloomType.valueOf(cmdLine.getOptionValue(
BLOOM_FILTER_OPTION));
} int blockSize = HConstants.DEFAULT_BLOCKSIZE;
if (cmdLine.hasOption(BLOCK_SIZE_OPTION))
blockSize = Integer.valueOf(cmdLine.getOptionValue(BLOCK_SIZE_OPTION)); if (cmdLine.hasOption(BLOOM_BLOCK_SIZE_OPTION)) {
conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE,
Integer.valueOf(cmdLine.getOptionValue(BLOOM_BLOCK_SIZE_OPTION)));
} if (cmdLine.hasOption(INDEX_BLOCK_SIZE_OPTION)) {
conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY,
Integer.valueOf(cmdLine.getOptionValue(INDEX_BLOCK_SIZE_OPTION)));
} HFileContext meta = new HFileContextBuilder().withCompression(compr)
.withBlockSize(blockSize).build();
StoreFileWriter sfw = new StoreFileWriter.Builder(conf,
new CacheConfig(conf), fs)
.withOutputDir(outputDir)
.withBloomType(bloomType)
.withMaxKeyCount(numKV)
.withFileContext(meta)
.build(); rand = new Random();
LOG.info("Writing " + numKV + " key/value pairs");
for (long i = 0; i < numKV; ++i) {
sfw.append(generateKeyValue(i));
} int numMetaBlocks = rand.nextInt(10) + 1;
LOG.info("Writing " + numMetaBlocks + " meta blocks");
for (int metaI = 0; metaI < numMetaBlocks; ++metaI) {
sfw.getHFileWriter().appendMetaBlock(generateString(),
new BytesWritable(generateValue()));
}
sfw.close(); Path storeFilePath = sfw.getPath();
long fileSize = fs.getFileStatus(storeFilePath).getLen();
LOG.info("Created {}, {} bytes, compression={}", storeFilePath, fileSize, compr.toString()); return true;
} private void configureKeyValue(long numKV, int keyLen, int valueLen) {
numKV = Math.abs(numKV);
keyLen = Math.abs(keyLen);
keyPrefixLen = 0;
while (numKV != 0) {
numKV >>>= 8;
++keyPrefixLen;
} this.keyLen = Math.max(keyPrefixLen, keyLen);
this.valueLen = valueLen; // Arbitrarily split the key into row, column family, and qualifier.
rowLen = keyPrefixLen / 3;
cfLen = keyPrefixLen / 4;
} private int nextInRange(int range) {
return rand.nextInt(2 * range + 1) - range;
} public KeyValue generateKeyValue(long i) {
byte[] k = generateKey(i);
byte[] v = generateValue(); return new KeyValue(
k, 0, rowLen,
k, rowLen, cfLen,
k, rowLen + cfLen, k.length - rowLen - cfLen,
rand.nextLong(),
generateKeyType(rand),
v, 0, v.length);
} public static KeyValue.Type generateKeyType(Random rand) {
if (rand.nextBoolean()) {
// Let's make half of KVs puts.
return KeyValue.Type.Put;
} else {
KeyValue.Type keyType =
KeyValue.Type.values()[1 + rand.nextInt(NUM_VALID_KEY_TYPES)];
if (keyType == KeyValue.Type.Minimum || keyType == KeyValue.Type.Maximum)
{
throw new RuntimeException("Generated an invalid key type: " + keyType
+ ". " + "Probably the layout of KeyValue.Type has changed.");
}
return keyType;
}
} private String generateString() {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < rand.nextInt(10); ++i) {
sb.append((char) ('A' + rand.nextInt(26)));
}
return sb.toString();
} private byte[] generateKey(long i) {
byte[] k = new byte[Math.max(keyPrefixLen, keyLen
+ nextInRange(LEN_VARIATION))];
for (int pos = keyPrefixLen - 1; pos >= 0; --pos) {
k[pos] = (byte) (i & 0xFF);
i >>>= 8;
}
for (int pos = keyPrefixLen; pos < k.length; ++pos) {
k[pos] = (byte) rand.nextInt(256);
}
return k;
} private byte[] generateValue() {
byte[] v = new byte[Math.max(1, valueLen + nextInRange(LEN_VARIATION))];
for (int i = 0; i < v.length; ++i) {
v[i] = (byte) rand.nextInt(256);
}
return v;
} public static void main(String[] args) {
CreateRandomStoreFile app = new CreateRandomStoreFile();
try {
if (!app.run(args))
System.exit(EXIT_FAILURE);
} catch (IOException ex) {
LOG.error(ex.toString(), ex);
System.exit(EXIT_FAILURE);
} } }
HBase源码实战:CreateRandomStoreFile的更多相关文章
- HBase源码实战:BufferedMutator
/** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agr ...
- HBase源码实战:ImportTsv
/** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agr ...
- Hbase源码分析:Hbase UI中Requests Per Second的具体含义
Hbase源码分析:Hbase UI中Requests Per Second的具体含义 让运维加监控,被问到Requests Per Second(见下图)的具体含义是什么?我一时竟回答不上来,虽然大 ...
- hbase源码系列(十二)Get、Scan在服务端是如何处理?
继上一篇讲了Put和Delete之后,这一篇我们讲Get和Scan, 因为我发现这两个操作几乎是一样的过程,就像之前的Put和Delete一样,上一篇我本来只打算写Put的,结果发现Delete也可以 ...
- hbase源码带注释版本,放在这里,方便大家下载吧
看了5个月的hbase源码,记录了一些笔记,如果有需要的朋友可以拿去. 里面总共包括几个主要的工程吧:hbase-common,hbase-client,hbase-prefix-tree,hbase ...
- HBase源码学习系列
转自:http://www.cnblogs.com/cenyuhai/tag/hbase%E6%BA%90%E7%A0%81%E7%B3%BB%E5%88%97/ (mark) hbase源码系列(十 ...
- Hbase源码分析:RPC概况
RPC是hbase中Master,RegionServer和Client三者之间通信交流的纽带.了解hbase的rpc机制能够为通过源码学习hbase奠定良好的基础.因为了解了hbase的rpc机制能 ...
- 11 hbase源码系列(十一)Put、Delete在服务端是如何处理
hbase源码系列(十一)Put.Delete在服务端是如何处理? 在讲完之后HFile和HLog之后,今天我想分享是Put在Region Server经历些了什么?相信前面看了<HTab ...
- hbase源码系列(十二)Get、Scan在服务端是如何处理
hbase源码系列(十二)Get.Scan在服务端是如何处理? 继上一篇讲了Put和Delete之后,这一篇我们讲Get和Scan, 因为我发现这两个操作几乎是一样的过程,就像之前的Put和Del ...
随机推荐
- TypeError: unorderable types: str() >= int()
1.问题描述 age=input('please enter your age') if age >=18: print('your age is',age) print('adult') el ...
- 详解intellij idea搭建SSM框架(spring+maven+mybatis+mysql+junit)(上)
SSM(Spring+SpringMVC+MyBatis)框架集由Spring.SpringMVC.MyBatis三个开源框架整合而成,常作为数据源较简单的web项目的框架. 其中spring是一个轻 ...
- EF架构~FluentValidation实体检验与实体分离了
回到目录 在MVC,EF,LINQ环境里,我们经常会用到DataModel(DO)和ViewModel(VO),可能对于它们的属性校验我们会采用特性的方式,当然这很直观,就连微软的DEMO也是如些,一 ...
- DocX开源WORD操作组件的学习系列四
DocX学习系列 DocX开源WORD操作组件的学习系列一 : http://www.cnblogs.com/zhaojiedi1992/p/zhaojiedi_sharp_001_docx1.htm ...
- Java多线程父子线程关系 多线程中篇(六)
有的时候对于Java多线程,我们会听到“父线程.子线程”的概念. 严格的说,Java中不存在实质上的父子关系 没有方法可以获取一个线程的父线程,也没有方法可以获取一个线程所有的子线程 子线程的消亡与父 ...
- JavaSE之Long 详解 Long的方法简介以及用法
基本功能 Long 类在对象中包装了基本类型 long 的值 每个 Long 类型的对象都包含一个 long 类型的字段 static long MAX_VALUE long 8个字节最大值2^63- ...
- 【细语】C#之扩展方法原理及其使用
1.写在前面 今天群里一个小伙伴问了这样一个问题,扩展方法与实例方法的执行顺序是什么样子的,谁先谁后(这个问题会在文章结尾回答).所以写了这边文章,力图从原理角度解释扩展方法及其使用. 以下为主要内容 ...
- Joda-Time开源库
Joda-Time是一个面向 Java™ 平台的易于使用的开源时间日期库. 依赖 <dependency> <groupId>joda-time</groupId> ...
- Markdown 语法详尽笔记大全 2019
目录 0.介绍 1.快捷键 2.基本语法 2.1 分级标题 # 写法1 # 写法2 上下文标题 2.2 字体设置斜体.粗体.删除线 _*~ 2.3 分割线 --- 或 *** 2.4 引 ...
- C# 判断用户是否对路径拥有访问权限
如何获取当前系统用户对文件/文件夹的操作权限? 1.获取安全信息DirectorySecurity DirectorySecurity fileAcl = Directory.GetAccessCon ...