Riak VClock

关于向量时钟的概念。在这里就多讲了,大家能够參照一下Dynamo的论文了解一下,向量时钟在分布式主要用于解决一致性性问题。能够和CRDTs一起看。

以下的源码是參照riak中的,就是把它翻译为elixir格式而已。基本不变。

时钟主要出现的情况有网络分区和并行更新。

这样仅仅会丢掉一些向量时钟的信息,即数据更新过程的信息,可是不会丢掉实实在在的数据。仅仅有当一种情况会有问题,就是一个client保持了一个非常久之前的向量时钟,然后继承于这个向量时钟提交了一个数据,此时就会有冲突。由于服务器这边已经没有这个非常久之前的向量时钟信息了,已经被剪枝掉了可能,所以client提交的此次数据,在服务端无法找到一个祖先。此时就会创建一个sibling。

所以这个剪枝的策略是一个权衡tradeoff,一方面是无限增长的向量时钟的空间。还有一方面是偶尔的会有"false merge"。对,但肯定的是,不会悄无声息的丢数据。综上。为了防止向量时钟空间的无限增长,剪枝还是比用server标识向量时钟工作的更好。
  • 结构:

主要有3个元祖{node,
{opCount, TS}}
,分布为节点(协调器)。操作数和操作时间。

  • 基本的方法:

merge(合并):

合并的规则是,opCount>TS:当节同样时,谁的opCount大,谁赢;假设opCount一样时,谁的时间大谁赢。

@doc """
Combine all VClock in the input list into their least possible common descendant
"""
@spec merge(list, list) :: list
def merge([]), do: []
def merge([singevclock]), do: singevclock
## first is a list, eg [:a, {1, 1234}]
# rest is list of list, eg [[{:a, {1, 233}}, {:b, {3, 124}}]]
def merge([first|rest]) do
merge(rest, :lists.keysort(1, first))
end
def merge([], nclock), do: nclock
def merge([aclock|vclocks], nclock) do
merge(vclocks, merge(:lists.keysort(1, aclock), nclock, []))
end
def merge([], [], accclock), do: :lists.reverse(accclock)
def merge([], left, accclock), do: :lists.reverse(accclock, left)
def merge(left, [], accclock), do: :lists.reverse(accclock, left)
def merge(v = [{node1, {ctr1, ts1} = ct1} = nct1 | vclock],
n = [{node2, {ctr2, ts2} = ct2} = nct2 | nclock], accclock) do
cond do
node1 < node2 ->
merge(vclock, n, [nct1|accclock]);
node1 > node2 ->
merge(v, nclock, [nct2|accclock]);
true ->
({_ctr, _ts} = ct) = cond do
ctr1 > ctr2 ->
ct1;
ctr1 < ctr2 ->
ct2;
true ->
{ctr1, :erlang.max(ts1, ts2)}
end
merge(vclock, nclock, [{node1, ct}|accclock])
end
end

prune(裁剪):

裁剪的法则主要是空间时间双方面.

!()[../pic/riak_4.png]

终于的裁剪函数prune_vclock1(v,
now, bprops, headtime)
.

@doc """
Possibly shrink the size of a vclock, depending on current age and size
"""
@spec prune(v :: list, now :: integer, bucketprops :: any) :: list
def prune(v, now, bucketprops) do
## This sort need to be deterministic, to avoid spurious merge conflicts later,
# We achieve this by using the node ID as secondary key
sortv = :lists.sort(fn({n1, {_, t1}}, {n2, {_, t2}}) -> {t1, n1} < {t2, n2} end, v)
prune_vclock1(sortv, now, bucketprops)
end def prune_vclock1(v, now, bprops) do
case get_property(:small_vclock, bprops) >= :erlang.length(v) do
true -> v;
false ->
{_, {_, headtime}} = hd(v)
case (now - headtime) < get_property(:young_vclock, bprops) do
true -> v;
false -> prune_vclock1(v, now, bprops, headtime)
end
end
end def prune_vclock1(v, now, bprops, headtime) do
# has a precondition that v is longer than small and older than young
case (:erlang.length(v) > get_property(:big_vclock, bprops)) or ((now - headtime) > get_property(:old_vclock, bprops)) do
true -> prune_vclock1(tl(v), now, bprops);
false -> v
end
end def get_property(key, pairlist) do
case :lists.keyfind(key, 1, pairlist) do
{_key, value} ->
value;
false ->
:undefined
end
end

  • source
defmodule VClock do
@moduledoc """
this is !!!!!!!!
"""
@vsn 0.1 @spec fresh() :: []
def fresh do
[]
end # return true if va is a direct descendant of vb, else false -- remember, a vclock is its own descendant!
@spec descends(any, []) :: (true|false)
def descends(_, []) do
true
end @type va :: list()
@spec descends(any, any) :: (false|true)
def descends(va, vb) do
[{nodeb, {ctrb, _}} | resetb] = vb
case :lists.keyfind(nodeb, 1, va) do
false ->
false;
{_, {ctra, _tsa}} ->
(ctra >= ctrb) && descends(va, resetb)
end
end @doc """
Combine all VClock in the input list into their least possible common descendant
"""
@spec merge(list, list) :: list
def merge([]), do: []
def merge([singevclock]), do: singevclock
## first is a list, eg [:a, {1, 1234}]
# rest is list of list, eg [[{:a, {1, 233}}, {:b, {3, 124}}]]
def merge([first|rest]) do
merge(rest, :lists.keysort(1, first))
end
def merge([], nclock), do: nclock
def merge([aclock|vclocks], nclock) do
merge(vclocks, merge(:lists.keysort(1, aclock), nclock, []))
end
def merge([], [], accclock), do: :lists.reverse(accclock)
def merge([], left, accclock), do: :lists.reverse(accclock, left)
def merge(left, [], accclock), do: :lists.reverse(accclock, left)
def merge(v = [{node1, {ctr1, ts1} = ct1} = nct1 | vclock],
n = [{node2, {ctr2, ts2} = ct2} = nct2 | nclock], accclock) do
cond do
node1 < node2 ->
merge(vclock, n, [nct1|accclock]);
node1 > node2 ->
merge(v, nclock, [nct2|accclock]);
true ->
({_ctr, _ts} = ct) = cond do
ctr1 > ctr2 ->
ct1;
ctr1 < ctr2 ->
ct2;
true ->
{ctr1, :erlang.max(ts1, ts2)}
end
merge(vclock, nclock, [{node1, ct}|accclock])
end
end @doc """
get the counter value in vclock set from node
"""
@spec get_counter(node :: atom, vclock::list) :: (integer|:undefined)
def get_counter(node, vclock) do
case :lists.keytake(node, 1, vclock) do
{_, {c, _}} -> c;
false -> :undefined
end
end @doc """
Get the timestamp value in a VClock set from node
"""
@spec get_timestamp(node :: atom, vclock :: list) :: (integer | :undefined)
def get_timestamp(node, vclock) do
case :lists.keytake(node, 1, vclock) do
{_, {_, ts}} -> ts;
false -> :undefined
end
end
@doc """
increment VClock at node
"""
@spec increment(atom, list) :: integer
def increment(node, vclock) do
increment(node, timestamp(), vclock)
end
@spec increment(atom, integer, list) :: list
def increment(node, incts, vclock) do
IO.puts "#{inspect node}, #{inspect incts}, #{inspect vclock}"
{{_ctr, _ts} = c1, newv} = case :lists.keytake(node, 1, vclock) do
false ->
{{1, incts}, vclock};
{:value, {_n, {c, _t}}, modv} ->
{{c + 1, incts}, modv}
end
[{node, c1} | newv]
end # retrun the list of all nodes that have ever incremented VClock
@spec all_nodes(vclock :: list) :: (list)
def all_nodes(vclock) do
vclock |> Enum.map(fn({x, {_, _}}) -> x end)
end
@days_from_gergorian_base_to_epoch (1978 * 365 + 478)
@seconds_from_gergorian_base_to_epoch (@days_from_gergorian_base_to_epoch * 24 * 60 * 60)
@spec timestamp() :: integer
def timestamp do
{megaseconds, seconds, _} = :os.timestamp()
@days_from_gergorian_base_to_epoch + megaseconds * 1000000 + seconds
end @doc """
Compares two VClock for equality
"""
@spec equal(va :: list, vb :: list) :: (true | false)
def equal(va, vb) do
Enum.sort(va) === Enum.sort(vb)
end @doc """
Possibly shrink the size of a vclock, depending on current age and size
"""
@spec prune(v :: list, now :: integer, bucketprops :: any) :: list
def prune(v, now, bucketprops) do
## This sort need to be deterministic, to avoid spurious merge conflicts later,
# We achieve this by using the node ID as secondary key
sortv = :lists.sort(fn({n1, {_, t1}}, {n2, {_, t2}}) -> {t1, n1} < {t2, n2} end, v)
prune_vclock1(sortv, now, bucketprops)
end def prune_vclock1(v, now, bprops) do
case get_property(:small_vclock, bprops) >= :erlang.length(v) do
true -> v;
false ->
{_, {_, headtime}} = hd(v)
case (now - headtime) < get_property(:young_vclock, bprops) do
true -> v;
false -> prune_vclock1(v, now, bprops, headtime)
end
end
end def prune_vclock1(v, now, bprops, headtime) do
# has a precondition that v is longer than small and older than young
case (:erlang.length(v) > get_property(:big_vclock, bprops)) or ((now - headtime) > get_property(:old_vclock, bprops)) do
true -> prune_vclock1(tl(v), now, bprops);
false -> v
end
end def get_property(key, pairlist) do
case :lists.keyfind(key, 1, pairlist) do
{_key, value} ->
value;
false ->
:undefined
end
end end

Riak VClock的更多相关文章

  1. Linux/centos下安装riak

    必备的组件: gccgcc-c++glibc-develmakepam-devel 使用yum安装相关组件 sudo yum install gcc gcc-c++ glibc-devel make ...

  2. 向量时钟Vector Clock in Riak

    Riak 是以 Erlang 编写的一个高度可扩展的分布式数据存储,Riak的实现是基于Amazon的Dynamo论文,Riak的设计目标之一就是高可用.Riak支持多节点构建的系统,每次读写请求不需 ...

  3. DB监控-Riak集群监控

    公司的Riak版本是2.0.4,目前已根据CMDB三级业务部署了十几套集群,大部分是跨机房部署.监控采集分为两个大的维度,第一个维度是单机,也就是 「IP:端口」:第二个维度是集群,也就是所有节点指标 ...

  4. Centos6.5里安装Erlang 并安装riak

    一.Erlang安装: 1 首先进入www.erlang.org 下载页面,下载otp_src_17.5.tar.gz. IT网,http://www.it.net.cn 2 解压缩:tar -xzv ...

  5. 分布式系统中一些主要的副本更新策略——Dynamo/Cassandra/Riak同时采取了主从式更新的同步+异步类型,以及任意节点更新的策略。

    分布式系统中一些主要的副本更新策略. 1.同时更新 类型A:没有任何协议,可能出现多个节点执行顺序交叉导致数据不一致情况. 类型B:通过一致性协议唯一确定不同更新操作的执行顺序,从而保证数据一致性 2 ...

  6. HBase Cassandra Riak HyperTable

    Cassandra                                                              HBase 一致性 Quorum NRW策略 通过Goss ...

  7. IOT数据库选型——NOSQL,MemSQL,cassandra,Riak或者OpenTSDB,InfluxDB

    IoT databases should be as flexible as required by the application. NoSQLdatabases -- especially key ...

  8. 对比Cassandra、 Mongodb、CouchDB、Redis、Riak、 Membase、Neo4j、HBase

    转自:http://www.cnblogs.com/alephsoul-alephsoul/archive/2013/04/26/3044630.html 导读:Kristóf Kovács 是一位软 ...

  9. Riak

    出处:http://www.oschina.net/p/riak Riak是以 Erlang 编写的一个高度可扩展的分布式数据存储,Riak的实现是基于Amazon的Dynamo论文,Riak的设计目 ...

随机推荐

  1. boost 定时器.

    #include <iostream> #include <boost/asio.hpp> int main() { boost::asio::io_service io; b ...

  2. 执行update操作的话,就会报“Connection is read-only. Queries leading to data modification are not allowed”的异常。

    我用的是 spring + springmvc + mybatis +mysql. <tx:advice id="txAdvice" transaction-manager= ...

  3. java中的二进制

    (1)按位与运算 & 1 & 1 = 1, 0 & 1 = 0 51 & 5  即 0011  0011 & 0000  0101 =0000 0001 = 1 ...

  4. EffectiveC#01--避免返回内部类对象的引用

    此篇是对00中第3点的再一次阐述. 1.如果一个属性返回一个引用类型,那么调用者就可以访问这个对象的公共成员,也包括修改这些属性的状态. public class MyBusinessObject { ...

  5. WCF之旅

    转载:创建一个简单的WCF程序 http://www.cnblogs.com/artech/archive/2007/02/26/656901.html  Endpoint Overview http ...

  6. 如何在asp.net中如何在线播放各类视频文件

    一.后台拼字符串动态加载写法 前台调用代码 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" &q ...

  7. Django 实战 之 搭项目(正在更新)

    系统:win10 python版本:python 3.5 工具: pyCharm 3.4 professional 源码来源:https://github.com/ouzhigang/django-o ...

  8. CTL_CODE 宏 详解

    CTL_CODE宏 CTL_CODE:用于创建一个唯一的32位系统I/O控制代码,这个控制代码包括4部分组成: DeviceType(设备类型,高16位(16-31位)), Function(功能2- ...

  9. zoj Grouping(强连通+缩点+关键路径)

    题意: 给你N个人,M条年龄大小的关系,现在打算把这些人分成不同的集合,使得每个集合的任意两个人之间的年龄是不可比的.问你最小的集合数是多少? 分析: 首先,假设有一个环,那么这个环中的任意两个点之间 ...

  10. Lucene学习总结之七:Lucene搜索过程解析

    一.Lucene搜索过程总论 搜索的过程总的来说就是将词典及倒排表信息从索引中读出来,根据用户输入的查询语句合并倒排表,得到结果文档集并对文档进行打分的过程. 其可用如下图示: 总共包括以下几个过程: ...