bucket list 函数解析
cls_bucket_list 函数
librados::IoCtx index_ctx;
// key - oid (for different shards if there is any)
// value - list result for the corresponding oid (shard), it is filled by the AIO callback
map<int, string> oids;
map<int, struct rgw_cls_list_ret> list_results;
int r = open_bucket_index(bucket, index_ctx, oids, shard_id);
if (r < 0)
return r;
cls_rgw_obj_key start_key(start.name, start.instance);
r = CLSRGWIssueBucketList(index_ctx, start_key, prefix, num_entries, list_versions,
oids, list_results, cct->_conf->rgw_bucket_index_max_aio)();
if (r < 0)
return r;
获取桶的shard 对象,存入oids map中. 获取的内容存储到list_results
// Create a list of iterators that are used to iterate each shard
vector<map<string, struct rgw_bucket_dir_entry>::iterator> vcurrents(list_results.size());
vector<map<string, struct rgw_bucket_dir_entry>::iterator> vends(list_results.size());
vector<string> vnames(list_results.size());
map<int, struct rgw_cls_list_ret>::iterator iter = list_results.begin();
*is_truncated = false;
for (; iter != list_results.end(); ++iter) {
vcurrents.push_back(iter->second.dir.m.begin());
vends.push_back(iter->second.dir.m.end());
vnames.push_back(oids[iter->first]);
*is_truncated = (*is_truncated || iter->second.is_truncated);
}
处理list_results, list_results容器中存放的是桶各个shard的对象.
// Create a map to track the next candidate entry from each shard, if the entry
// from a specified shard is selected/erased, the next entry from that shard will
// be inserted for next round selection
map<string, size_t> candidates;
for (size_t i = 0; i < vcurrents.size(); ++i) {
if (vcurrents[i] != vends[i]) {
candidates[vcurrents[i]->first] = i;
}
}
创建一个map用于跟踪各个bucket shard的
// Select the next one
int pos = candidates.begin()->second;
const string& name = vcurrents[pos]->first;
struct rgw_bucket_dir_entry& dirent = vcurrents[pos]->second;
// fill it in with initial values; we may correct later
RGWObjEnt e;
e.key.set(dirent.key.name, dirent.key.instance);
e.size = dirent.meta.size;
e.accounted_size = dirent.meta.accounted_size;
e.mtime = dirent.meta.mtime;
e.etag = dirent.meta.etag;
e.owner = dirent.meta.owner;
e.owner_display_name = dirent.meta.owner_display_name;
e.content_type = dirent.meta.content_type;
e.tag = dirent.tag;
e.flags = dirent.flags;
e.versioned_epoch = dirent.versioned_epoch;
获取到对应的bucket_entry的值.
bool force_check = force_check_filter && force_check_filter(dirent.key.name);
if ((!dirent.exists && !dirent.is_delete_marker()) || !dirent.pending_map.empty() || force_check) {
/* there are uncommitted ops. We need to check the current state,
* and if the tags are old we need to do cleanup as well. */
librados::IoCtx sub_ctx;
sub_ctx.dup(index_ctx);
r = check_disk_state(sub_ctx, bucket, dirent, e, updates[vnames[pos]]);
if (r < 0 && r != -ENOENT) {
return r;
}
}
判断是否应该进入更新,判断条件
- direct.exists 不存在 并且没有被设置delete_marker
- dirent.pending_map 不为空, 说明有重新写入情况.
- Force_check 强制检查.
if (r >= 0) {
ldout(cct, 10) << "RGWRados::cls_bucket_list: got " << e.key.name << "[" << e.key.instance << "]" << dendl;
m[name] = std::move(e);
++count;
}
检查完成, 将bucket_index entry 复制给m表.
// Refresh the candidates map
candidates.erase(candidates.begin());
++vcurrents[pos];
if (vcurrents[pos] != vends[pos]) {
candidates[vcurrents[pos]->first] = pos;
}
刷新更新表, 继续解析下一个对象.
// Suggest updates if there is any
map<string, bufferlist>::iterator miter = updates.begin();
for (; miter != updates.end(); ++miter) {
if (miter->second.length()) {
ObjectWriteOperation o;
cls_rgw_suggest_changes(o, miter->second);
// we don't care if we lose suggested updates, send them off blindly
AioCompletion *c = librados::Rados::aio_create_completion(NULL, NULL, NULL);
index_ctx.aio_operate(miter->first, c, &o);
c->release();
}
}
更新对象操作, 重点参考 cls_rgw_suggest_changes 函数
// Check if all the returned entries are consumed or not
for (size_t i = 0; i < vcurrents.size(); ++i) {
if (vcurrents[i] != vends[i])
*is_truncated = true;
}
if (!m.empty())
*last_entry = m.rbegin()->first;
最后设置is_truncated的值 设置last_entry的值
check_disk_state
函数说明: 检查磁盘上的对象的状态
rgw_obj obj;
std::string oid, instance, loc, ns;
rgw_obj_key key;
key.set(list_state.key);
oid = key.name;
if (!rgw_obj::strip_namespace_from_object(oid, ns, instance)) {
// well crap
assert(0 == "got bad object name off disk");
}
obj.init(bucket, oid);
obj.set_loc(list_state.locator);
obj.set_ns(ns);
obj.set_instance(key.instance);
get_obj_bucket_and_oid_loc(obj, bucket, oid, loc);
io_ctx.locator_set_key(loc);
RGWObjState *astate = NULL;
RGWObjectCtx rctx(this);
int r = get_obj_state(&rctx, obj, &astate, NULL);
if (r < 0)
return r;
list_state.pending_map.clear(); // we don't need this and it inflates size
if (!astate->exists) {
/* object doesn't exist right now -- hopefully because it's
* marked as !exists and got deleted */
if (list_state.exists) {
/* FIXME: what should happen now? Work out if there are any
* non-bad ways this could happen (there probably are, but annoying
* to handle!) */
}
// encode a suggested removal of that key
list_state.ver.epoch = io_ctx.get_last_version();
list_state.ver.pool = io_ctx.get_id();
cls_rgw_encode_suggestion(CEPH_RGW_REMOVE, list_state, suggested_updates);
return -ENOENT;
}
string etag;
string content_type;
ACLOwner owner;
object.size = astate->size;
object.mtime = astate->mtime;
map<string, bufferlist>::iterator iter = astate->attrset.find(RGW_ATTR_ETAG);
if (iter != astate->attrset.end()) {
etag = iter->second.c_str();
}
iter = astate->attrset.find(RGW_ATTR_CONTENT_TYPE);
if (iter != astate->attrset.end()) {
content_type = iter->second.c_str();
}
iter = astate->attrset.find(RGW_ATTR_ACL);
if (iter != astate->attrset.end()) {
r = decode_policy(iter->second, &owner);
if (r < 0) {
dout(0) << "WARNING: could not decode policy for object: " << obj << dendl;
}
}
if (astate->has_manifest) {
RGWObjManifest::obj_iterator miter;
RGWObjManifest& manifest = astate->manifest;
for (miter = manifest.obj_begin(); miter != manifest.obj_end(); ++miter) {
rgw_obj loc = miter.get_location();
if (loc.ns == RGW_OBJ_NS_MULTIPART) {
//dout(10) << "check_disk_state(): removing manifest part from index: " << loc << dendl;
r = delete_obj_index(loc);
if (r < 0) {
dout(0) << "WARNING: delete_obj_index() returned r=" << r << dendl;
}
}
}
}
object.etag = etag;
object.content_type = content_type;
object.owner = owner.get_id();
object.owner_display_name = owner.get_display_name();
// encode suggested updates
list_state.ver.pool = io_ctx.get_id();
list_state.ver.epoch = astate->epoch;
list_state.meta.size = object.size;
list_state.meta.mtime = object.mtime;
list_state.meta.category = main_category;
list_state.meta.etag = etag;
list_state.meta.content_type = content_type;
if (astate->obj_tag.length() > 0)
list_state.tag = astate->obj_tag.c_str();
list_state.meta.owner = owner.get_id().to_str();
list_state.meta.owner_display_name = owner.get_display_name();
list_state.exists = true;
cls_rgw_encode_suggestion(CEPH_RGW_UPDATE, list_state, suggested_updates);
return 0;
cls_rgw_suggest_changes
cls_rgw.cc rgw_dir_suggest_changes
struct rgw_bucket_dir_header {
map<uint8_t, rgw_bucket_category_stats> stats;
uint64_t tag_timeout;
uint64_t ver;
uint64_t master_ver;
string max_marker;
rgw_bucket_dir_header() : tag_timeout(0), ver(0), master_ver(0) {}
};
Bucket header的结构体:
Tag_timeout : pending_map项的时间戳和当前时间相比,相差超过tag-timeout,则删除pending_map项.
Ver : 每次更新都会增加1
Master_ver : ?? 检查代码中只有获取,没有赋值的地方.
Next_marker : ??
rgw_bucket_dir_header持久化为omap header
CLS_LOG(1, "rgw_dir_suggest_changes()");
bufferlist header_bl;
struct rgw_bucket_dir_header header;
bool header_changed = false;
int rc = read_bucket_header(hctx, &header);
if (rc < 0) {
CLS_LOG(1, "ERROR: rgw_dir_suggest_changes(): failed to read header\n");
return rc;
}
取出桶的header信息.
timespan tag_timeout(header.tag_timeout ? header.tag_timeout : CEPH_RGW_TAG_TIMEOUT);
计算超时时长.
while (!in_iter.end()) {
__u8 op;
rgw_bucket_dir_entry cur_change;
rgw_bucket_dir_entry cur_disk;
try {
::decode(op, in_iter);
::decode(cur_change, in_iter);
} catch (buffer::error& err) {
CLS_LOG(1, "ERROR: rgw_dir_suggest_changes(): failed to decode request\n");
return -EINVAL;
}
//decode dir_key
bufferlist cur_disk_bl;
string cur_change_key;
encode_obj_index_key(cur_change.key, &cur_change_key);
int ret = cls_cxx_map_get_val(hctx, cur_change_key, &cur_disk_bl);
if (ret < 0 && ret != -ENOENT)
return -EINVAL;
//获取osd中对象信息
if (cur_disk_bl.length()) {
bufferlist::iterator cur_disk_iter = cur_disk_bl.begin();
try {
::decode(cur_disk, cur_disk_iter);
} catch (buffer::error& error) {
CLS_LOG(1, "ERROR: rgw_dir_suggest_changes(): failed to decode cur_disk\n");
return -EINVAL;
}
real_time cur_time = real_clock::now();
map<string, struct rgw_bucket_pending_info>::iterator iter =
cur_disk.pending_map.begin();
while(iter != cur_disk.pending_map.end()) {
map<string, struct rgw_bucket_pending_info>::iterator cur_iter=iter++;
if (cur_time > (cur_iter->second.timestamp + tag_timeout)) {
cur_disk.pending_map.erase(cur_iter);
}
//如果超时了.则删除这个pending_map,这个可能是安全性的检查.
}
}
CLS_LOG(20, "cur_disk.pending_map.empty()=%d op=%d cur_disk.exists=%d cur_change.pending_map.size()=%d cur_change.exists=%d\n",
cur_disk.pending_map.empty(), (int)op, cur_disk.exists,
(int)cur_change.pending_map.size(), cur_change.exists);
if (cur_disk.pending_map.empty()) {
if (cur_disk.exists) {
struct rgw_bucket_category_stats& old_stats = header.stats[cur_disk.meta.category];
CLS_LOG(10, "total_entries: %" PRId64 " -> %" PRId64 "\n", old_stats.num_entries, old_stats.num_entries - 1);
old_stats.num_entries--;
old_stats.total_size -= cur_disk.meta.accounted_size;
old_stats.total_size_rounded -= get_rounded_size(cur_disk.meta.accounted_size);
header_changed = true;
}
struct rgw_bucket_category_stats& stats =
header.stats[cur_change.meta.category];
switch(op) {
case CEPH_RGW_REMOVE:
CLS_LOG(10, "CEPH_RGW_REMOVE name=%s instance=%s\n", cur_change.key.name.c_str(), cur_change.key.instance.c_str());
ret = cls_cxx_map_remove_key(hctx, cur_change_key);
if (ret < 0)
return ret;
break;
case CEPH_RGW_UPDATE:
CLS_LOG(10, "CEPH_RGW_UPDATE name=%s instance=%s total_entries: %" PRId64 " -> %" PRId64 "\n",
cur_change.key.name.c_str(), cur_change.key.instance.c_str(), stats.num_entries, stats.num_entries + 1);
//统计更新
stats.num_entries++;
stats.total_size += cur_change.meta.accounted_size;
stats.total_size_rounded += get_rounded_size(cur_change.meta.accounted_size);
header_changed = true;
cur_change.index_ver = header.ver;
bufferlist cur_state_bl;
::encode(cur_change, cur_state_bl);
ret = cls_cxx_map_set_val(hctx, cur_change_key, &cur_state_bl);
if (ret < 0)
return ret;
break;
}
}
}
bucket list 函数解析的更多相关文章
- [转]javascript eval函数解析json数据时为什加上圆括号eval("("+data+")")
javascript eval函数解析json数据时为什么 加上圆括号?为什么要 eval这里要添加 “("("+data+")");//”呢? 原因在于: ...
- PHP json_decode 函数解析 json 结果为 NULL 的解决方法
在做网站 CMS 模块时,对于模块内容 content 字段,保存的是 json 格式的字符串,所以在后台进行模块内容的编辑操作 ( 取出保存的数据 ) 时,需要用到 json_decode() 函数 ...
- Matlab中bsxfun和unique函数解析
一.问题来源 来自于一份LSH代码,记录下来. 二.函数解析 2.1 bsxfun bsxfun是一个matlab自版本R2007a来就提供的一个函数,作用是”applies an element-b ...
- socket使用TCP协议时,send、recv函数解析以及TCP连接关闭的问题
Tcp协议本身是可靠的,并不等于应用程序用tcp发送数据就一定是可靠的.不管是否阻塞,send发送的大小,并不代表对端recv到多少的数据. 在阻塞模式下, send函数的过程是将应用程序请求发送的数 ...
- sigaction函数解析
http://blog.chinaunix.net/uid-1877180-id-3011232.html sigaction函数解析 sigaction函数的功能是检查或修改与指定信号相关联的处理 ...
- driver_register()函数解析
driver_register()函数解析 /** * driver_register - register driver with bus * @drv: driver to register * ...
- async函数解析
转载请注明出处:async函数解析 async函数是基于Generator函数实现的,也就是说是Generator函数的语法糖.在之前的文章有介绍过Generator函数语法和异步应用,如果对其不了解 ...
- tf.train.shuffle_batch函数解析
tf.train.shuffle_batch (tensor_list, batch_size, capacity, min_after_dequeue, num_threads=1, seed=No ...
- oracle中next_day()、last_day()函数解析
oracle中next_day()函数解析 Sql代码 当前系统时间的下一星期一的时间select next_day(sysdate,1) from dual NEXT_DAY(date,char ...
随机推荐
- pytest 安装和入门
安装pytest 1.在命令行中运行以下命令: pip install -U pytest 2.检查已经安装的版本: pytest --version This is pytest version 3 ...
- mysql 正确清理binlog日志的两种方法
前言: MySQL中的binlog日志记录了数据库中数据的变动,便于对数据的基于时间点和基于位置的恢复,但是binlog也会日渐增大,占用很大的磁盘空间,因此,要对binlog使用正确安全的方法清理掉 ...
- Delphi7中Unicode,ANSI,UTF编码问题
注解: ANSI 'American Standard Code for Information Interchange' 美国信息互换标准代码 ANSI的'Ascii'编码 Unicode ...
- PLC_SIM 出现I/O访问错误-技术论坛-工业支持中心-西门子中国
PLC_SIM 作为SIEMENS S7-300/400 系列PLC 的仿真软件,在使用时需要有些注意事项,毕竟任何的仿真软件和真正的设备还是有一定差异的,由此而产生的误会经常会令很多客户摸不着头脑, ...
- MySql中的SHOW INDEX 查出的结果列代表的意义
MySQL SHOW INDEX语法的实际操作用法以及其实际查看索引状态(语法)的具体内容的描述,如果你对这一技术,心存好奇的话,以下的文章将会揭开它的神秘面纱. INDEX FROM tbl_nam ...
- Zeppelin 0.6.2使用Spark的yarn-client模式
Zeppelin版本0.6.2 1. Export SPARK_HOME In conf/zeppelin-env.sh, export SPARK_HOME environment variable ...
- 【排序函数讲解】sort-C++
c++标准库里的排序函数,用于对给定区间所有元素进行排序.头 文件是#include 使用 Sort()在具体实现中规避了经典快速排序可能出现的.会导 致实际复杂度退化到 o(n²)的极端情况.它根据 ...
- pytorch实现yolov3(5) 实现端到端的目标检测
torch实现yolov3(1) torch实现yolov3(2) torch实现yolov3(3) torch实现yolov3(4) 前面4篇已经实现了network的forward,并且将netw ...
- 实验吧--web--天下武功唯快不破
---恢复内容开始--- 英文翻译过来嘛,就是:天下武功无快不破嘛.(出题者还是挺切题的) 看看前端源码: 注意这里 please post what you find with parameter: ...
- js的事件冒泡机制
js的事件冒泡机制呢,就是一个DOM树,一级一级向上冒的过程,最终是到document这个根节点这里.js的事件冒泡机制,就像是一个水泡在水底下,冒泡到水面的过程. 摘自醉清玄