通过Python操作hbase api
# coding=utf-8
# Author: ruin
"""
discrible: """
from thrift.transport import TSocket
from thrift.protocol import TBinaryProtocol
from thrift.transport import TTransport
from hbase import Hbase import struct # Method for encoding ints with Thrift's string encoding
def encode(n):
return struct.pack("i", n) # Method for decoding ints with Thrift's string encoding
def decode(s):
return int(s) if s.isdigit() else struct.unpack('i', s)[0]
class HBaseApi(object): def __init__(self,table='fr_test_hbase:test_api',host='10.2.46.240',port=9090):
self.table = table.encode('utf-8')
self.host = host
self.port = port
# Connect to HBase Thrift server
self.transport = TTransport.TBufferedTransport(TSocket.TSocket(host, port))
self.protocol = TBinaryProtocol.TBinaryProtocolAccelerated(self.transport) # Create and open the client connection
self.client = Hbase.Client(self.protocol)
self.transport.open()
# set type and field of column families
self.set_column_families([bytes],['info'])
self._build_column_families() def set_column_families(self,type_list,col_list=['info']):
self.columnFamiliesType = type_list self.columnFamilies = col_list def _build_column_families(self):
"""
give all column families name list,create a table
:return:
"""
tables = self.client.getTableNames()
if self.table not in tables:
self.__create_table(self.table) def __create_table(self,table):
"""
create table in hbase with column families
:param table: fr_test_hbase:fr_test
:return:
""" columnFamilies = []
for columnFamily in self.columnFamilies:
name = Hbase.ColumnDescriptor(name = columnFamily)
columnFamilies.append(name)
table = table.encode('utf-8')
print(type(table),type(columnFamilies)) self.client.createTable(table,columnFamilies) def __del__(self):
self.transport.close() def __del_table(self,table):
"""
delete a table,first need to disable it
"""
self.client.disableTable(table)
self.client.deleteTable(table) def getColumnDescriptors(self):
return self.client.getColumnDescriptors(self.table) def put(self, rowKey, qualifier, value):
"""
put one row
column is column name,value is column value
:param rowKey: rowKey
:param column: column name
:param value: column value
:description: HbaseApi(table).put('rowKey','column','value')
""" rowKey = rowKey.encode('utf-8')
mutations = []
# for j, column in enumerate(column):
if isinstance(value, str):
value = value.encode('utf-8')
m_name = Hbase.Mutation(column=(self.columnFamilies[0]+':'+qualifier).encode('utf-8'), value=value)
elif isinstance(value, int):
m_name = Hbase.Mutation(column=(self.columnFamilies[0]+':'+qualifier).encode('utf-8'), value=encode(value))
mutations.append(m_name)
self.client.mutateRow(self.table, rowKey, mutations, {}) def puts(self,rowKeys,qualifier,values):
""" put sevel rows, `qualifier` is autoincrement :param rowKeys: a single rowKey
:param values: values is a 2-dimension list, one piece element is [name, sex, age]
:param qualifier: column family qualifier Usage:: >>> HBaseTest('table').puts(rowKeys=[1,2,3],qualifier="name",values=[1,2,3]) """ mutationsBatch = []
if not isinstance(rowKeys,list):
rowKeys = [rowKeys] * len(values) for i, value in enumerate(values):
mutations = []
# for j, column in enumerate(value):
if isinstance(value, str):
value = value.encode('utf-8')
m_name = Hbase.Mutation(column=(self.columnFamilies[0]+':'+qualifier).encode('utf-8'), value=value)
elif isinstance(value, int):
m_name = Hbase.Mutation(column=(self.columnFamilies[0]+':'+qualifier).encode('utf-8'), value=encode(value))
mutations.append(m_name)
mutationsBatch.append(Hbase.BatchMutation(row = rowKeys[i].encode('utf-8'),mutations=mutations))
self.client.mutateRows(self.table, mutationsBatch, {}) def getRow(self,row, qualifier='name'):
"""
get one row from hbase table
:param row:
:param qualifier:
:return:
"""
# res = []
row = self.client.getRow(self.table, row.encode('utf-8'),{})
for r in row:
rd = {}
row = r.row.decode('utf-8')
value = (r.columns[b'info:name'].value).decode('utf-8')
rd[row] = value
# res.append(rd)
# print ('the row is ',r.row.decode('utf-8'))
# print ('the value is ',(r.columns[b'info:name'].value).decode('utf-8'))
return rd def getRows(self, rows, qualifier='name'):
"""
get rows from hbase,all the row sqecify the same 'qualifier'
:param rows: a list of row key
:param qualifier: column
:return: None
"""
# grow = True if len(rows) == 1 else False
res = []
for r in rows:
res.append(self.getRow(r,qualifier))
return res def scanner(self, numRows=100, startRow=None, stopRow=None):
""" :param numRows:
:param startRow:
:param stopRow:
:return:
"""
scan = Hbase.TScan(startRow, stopRow)
scannerId = self.client.scannerOpenWithScan(self.table,scan, {}) ret = []
rowList = self.client.scannerGetList(scannerId, numRows) for r in rowList:
rd = {}
row = r.row.decode('utf-8')
value = (r.columns[b'info:name'].value).decode('utf-8')
rd[row] = value
# print ('the row is ',r.row.decode('utf-8'))
# print ('the value is ',(r.columns[b'info:name'].value).decode('utf-8'))
ret.append(rd) return ret def demo():
ha = HBaseApi('fr_test_hbase:test_log1')
# ha.put('0002','age','23')
rowKeys = [str(key) for key in range(10001,10010)]
values = ['fr'+str(val) for val in range(10001,10010)]
ha.puts(rowKeys,'name',values)
print(ha.scanner())
# print(ha.getRow('0001'))
# print(ha.getRows(rowKeys))
if __name__ == "__main__":
demo()
通过Python操作hbase api的更多相关文章
- python 操作 hbase
python 是万能的,当然也可以通过api去操作big database 的hbase了,python是通过thrift去访问操作hbase 以下是在centos7 上安装操作,前提是hbase已经 ...
- 【Hbase三】Java,python操作Hbase
Java,python操作Hbase 操作Hbase python操作Hbase 安装Thrift之前所需准备 安装Thrift 产生针对Python的Hbase的API 启动Thrift服务 执行p ...
- 使用IDEA操作Hbase API 报错:org.apache.hadoop.hbase.client.RetriesExhaustedException的解决方法:
使用IDEA操作Hbase API 报错:org.apache.hadoop.hbase.client.RetriesExhaustedException的解决方法: 1.错误详情: Excepti ...
- Hbase理论&&hbase shell&&python操作hbase&&python通过mapreduce操作hbase
一.Hbase搭建: 二.理论知识介绍: 1Hbase介绍: Hbase是分布式.面向列的开源数据库(其实准确的说是面向列族).HDFS为Hbase提供可靠的底层数据存储服务,MapReduce为Hb ...
- Python操作HBase之happybase
安装Thrift 安装Thrift的具体操作,请点击链接 pip install thrift 安装happybase pip install happybase 连接(happybase.Conne ...
- python操作Hbase
本地操作 启动thrift服务:./bin/hbase-daemon.sh start thrift hbase模块产生: 下载thrfit源码包:thrift-0.8.0.tar.gz 解压安装 . ...
- python 操作Hbase 详解
博文参考:https://www.cnblogs.com/tashanzhishi/p/10917956.html 如果你们学习过Python,可以用Python来对Hbase进行操作. happyb ...
- python操作ansible api示例
#!/usr/bin/env python # -*- coding:utf-8 -*- import json import shutil from collections import named ...
- Python 操作 GA API 指南
因为需要写一个 Blog Feature 的缘故,所以接触了下 GA 的 Python API,发现 G 家的 API 不是那么直观,比较绕,但是,在使用过程中发现其实 G 家的 API 设计挺有意思 ...
随机推荐
- filebeat.service
# # filebeat systemd service # [Unit] Description=Filebeat Documentation=https://www.elastic.co/guid ...
- hive 用户订单行为 基础操作
今天用hive查询用户日志表.这是日志表的格式: user_id,item_id,cat_id,merchant_id,brand_id,month,day,action,age_range,gend ...
- linux 使用fdisk分区扩容,看介绍命令(未完)
https://www.cnblogs.com/chenmh/p/5096592.html LVM 逻辑磁盘的一些命令 http://man.linuxde.net/vgcreate
- 项目实践中--Git服务器的搭建与使用指南
一.前言 Git是一款免费.开源的分布式版本控制系统,用以有效.高速的处理从很小到非常大的项目版本管理.在平时的项目开发中,我们会使用到Git来进行版本控制. Git的功能特性: 从一般开发者的角度来 ...
- 网页webbrowser
http://www.codeproject.com/Articles/50544/Using-the-WebBrowser-Control-in-ASP-NET/
- 解决双系统开机no such device:
问题描述: 我的电脑本来是Ubuntu+win7双系统,自己前天想换成win64位,于是就安装系统,结果装好了之后开机进入grub选择win7之后,屏幕显示 no such device: press ...
- 使用pycharm手动搭建python语言django开发环境(三) 使用django的apps应用 添加应用静态文件
1)在django 工程目录中使用cmd命令行 敲入"python manage.py startapp app名称(例子为blog)" 2)在django工程目录中应该生成了bl ...
- 集合Map多对多映射(使用xml文件)
我们可以使用set,bag,map等来映射多对多关系.在这里,我们将使用map来进行多对多映射. 在这种情况下,将创建三个表. 多对多映射示例 我们需要创建以下文件来映射map元素.首先创建一个项目: ...
- iOS 实现从后台切换到前台-复制分享宝贝内容,打开淘宝APP,自动弹出宝贝提示信息
- (void)applicationDidBecomeActive:(UIApplication *)application { NSLog(@"\n ===> 程序重新激活 !&q ...
- 网页或WEB应用或PC端浏览器调用百度地图API
今天在写微网页中遇见了调用百度地图这个问题:在一个容器中显示地图信息如图(设计图截图) 然后在网上查了接口:http://api.map.baidu.com/,就是这个东东,当然不止这个,还有几个必选 ...