... consistent hashing breaks cluster-wide sort order ...
Basic Brick API: Basic UBF Data Types
table() = atom(nonempty,nonundefined);
key() = binary();
%% ts = timestamp = usually an int64: time_t * 1000000 + usecs
ts() = integer();
val() = binary();
exp_time() = time_t();
flags_list() = [do_op_flag()];
do_op_flag() = {testset, ts()} |
witness |
get_all_attribs |
%% Flags for get_many
{max_num, integer()} |
{binary_prefix, binary()} |
must_exist |
must_not_exist |
value_in_ram |
....
Basic Brick API: modify ops
add() = {add, key(), ts(), val(), exp_time(), flags_list()};
replace() = {replace, key(), ts(), val(), exp_time(), flags_list()};
set() = {set, key(), ts(), val(), exp_time(), flags_list()};
add_res() = do1_res_ok() | do1_res_fail();
do1_res_ok() = ok |
key_not_exist |
{ok, ts()} |
{ok, ts(), val()} |
....
do1_res_fail() = {key_exists, ts()} |
key_not_exist |
{ts_error, ts()} |
invalid_flag_present |
%% The responsible brick is unavailable/crashed/whatever.
brick_not_available;
Basic Brick API: read-only ops
get() = {get, key(), flags_list()};
get_many() = {get_many, key(), flags_list()};
get_res() = key_not_exist |
{ok, ts()} |
{ok, ts(), val()} |
{ok, ts(), val(), time_t(), flags_list()}
....
get_many_res() = {ok, {[{key(), ts()}], boolean()}} |
{ok, {[{key(), ts(), flags_list()}], boolean()}} |
{ok, {[{key(), ts(), val(), time_t(), flags_list()}], boolean()}} |
Micro-Transactions
- Original chain replication paper: head of chain can make non-deterministic decisions
Valid micro-transaction: all keys managed by same chain
[txn,
{op = replace, key = "string1", value = "Hello, world!"},
{op = delete, key = "string4"}
]
Invalid micro-transaction: keys managed by different chains
[txn,
{op = replace, key = "string1", value = "Hello, world!"},
{op = delete, key = "string2"}
]
Actually Useful Micro-transactions
- {TableName, KeyPrefix} -> Chain
- Key prefix is sort-of like a Riak "bucket"
- Key prefix method configurable per table
- Everything: <<"whole-key-whatever">>
- Fixed length prefix: <<Prefix:4/binary, "whatever">>
- Variable length prefix (e.g. to Nth slash): <<"@slfritchie/whatever">>
Example Micro-Transaction
Imagine a table called 'posts'
Sample key | Data stored in value blob
|
| /42/1 | Text of post #1
|
| /42/1/1 | Text of comment #1 on post #1
|
| /42/1/2 | Text of comment #2 on post #1
|
| /42/2 | Text of post #2
|
| /42/summary | Next post number, number of active posts, number of deleted posts, . . .
|
add_new_post(UserID, PostText) ->
Prefix = "/" ++ integer_to_list(UserID) ++ "/",
MetaKey = Prefix ++ "summary",
{ok, OldTS, OldVal} = brick_simple:get(posts, MetaKey),
#post{next_id = NextID, active = Active} =
OldMeta = binary_to_term(OldVal),
PostKey = Prefix ++ integer_to_list(NextID),
NewMeta = OldMeta#post{next_id = NextID + 1,
active = Active + 1},
%% replace op: Abort if the key does not exist
%% or if current timestamp /= OldTS.
%% add op: Abort if the key already exists.
Txn = [brick_server:make_txn(),
brick_server:make_replace(MetaKey, term_to_binary(NewMeta),
0, [{testset, OldTS}]),
brick_server:make_add(PostKey, PostText)],
[ok, ok] = brick_simple:do(posts, Txn).
The Admin Server