Dropped separate BTREE/BRANCH encodings

There is a bit of redundancy here, as we already know the weights of
btree's inner-branches from their parents. But in theory sharing the
same encoding for both the top level btree reference and inner-branches
should offer more chance for deduplication and hopefully less code.

This also moves some members around in the btree encoding so that the
redund blocks are at the beginning. This _might_ simplify decoding of
the variable-length redund blocks at some point.

Current btree encoding:

  .----+----+----+----.
  |       blocks    ...  redund leb128s (1-20 bytes)
  :                   :
  |----+----+----+----|
  |       trunk     ...  1 leb128 (1-5 bytes)
  |----+----+----+----|
  |       weight    ...  1 leb128 (1-5 bytes)
  |----+----+----+----|
  |       cksum       |  1 le32 (4 bytes)
  '----+----+----+----'

This also partially reverts some tag name changes:

- BNAME -> BRANCH
- DMARK -> BOOKMARK
This commit is contained in:
Christopher Haster
2023-08-22 13:20:37 -05:00
parent f70e7fe709
commit 256430213d
6 changed files with 114 additions and 178 deletions

View File

@@ -14,8 +14,8 @@ TAG_SUPERCONFIG = 0x0004
TAG_GSTATE = 0x0100
TAG_GRM = 0x0100
TAG_NAME = 0x0200
TAG_BNAME = 0x0200
TAG_DMARK = 0x0201
TAG_BRANCH = 0x0200
TAG_BOOKMARK = 0x0201
TAG_REG = 0x0202
TAG_DIR = 0x0203
TAG_STRUCT = 0x0300
@@ -25,8 +25,7 @@ TAG_BTREE = 0x030c
TAG_MDIR = 0x0311
TAG_MTREE = 0x0314
TAG_MROOT = 0x0318
TAG_BRANCH = 0x031c
TAG_DID = 0x0320
TAG_DID = 0x031c
TAG_UATTR = 0x0400
TAG_SATTR = 0x0500
TAG_ALT = 0x4000
@@ -106,18 +105,13 @@ def frommdir(data):
d += d_
return blocks
def frombranch(data):
def frombtree(data):
d = 0
block, d_ = fromleb128(data[d:]); d += d_
trunk, d_ = fromleb128(data[d:]); d += d_
cksum = fromle32(data[d:]); d += 4
return block, trunk, cksum
def frombtree(data):
d = 0
w, d_ = fromleb128(data[d:]); d += d_
block, trunk, cksum = frombranch(data[d:])
return w, block, trunk, cksum
cksum = fromle32(data[d:]); d += 4
return block, trunk, w, cksum
def popc(x):
return bin(x).count('1')
@@ -153,8 +147,8 @@ def tagrepr(tag, w, size, off=None):
size)
elif (tag & 0xff00) == TAG_NAME:
return '%s%s %d' % (
'bname' if tag == TAG_BNAME
else 'dmark' if tag == TAG_DMARK
'branch' if tag == TAG_BRANCH
else 'bookmark' if tag == TAG_BOOKMARK
else 'reg' if tag == TAG_REG
else 'dir' if tag == TAG_DIR
else 'name 0x%02x' % (tag & 0xff),
@@ -168,7 +162,6 @@ def tagrepr(tag, w, size, off=None):
else 'mdir' if tag == TAG_MDIR
else 'mtree' if tag == TAG_MTREE
else 'mroot' if tag == TAG_MROOT
else 'branch' if tag == TAG_BRANCH
else 'did' if tag == TAG_DID
else 'struct 0x%02x' % (tag & 0xff),
' w%d' % w if w else '',
@@ -527,7 +520,7 @@ class Rbyd:
rid_, w = rid__, w_
# catch any branches
if tag == TAG_BRANCH:
if tag == TAG_BTREE:
branch = (tag, j, d, data)
tags.append((tag, j, d, data))
@@ -539,7 +532,7 @@ class Rbyd:
if branch is not None and (
not depth or depth_ < depth):
tag, j, d, data = branch
block, trunk, cksum = frombranch(data)
block, trunk, _, cksum = frombtree(data)
rbyd = Rbyd.fetch(f, block_size, block, trunk)
# corrupted? bail here so we can keep traversing the tree
@@ -634,7 +627,7 @@ class Rbyd:
))
d_ += max(bdepths.get(d, 0), 1)
leaf = (bid-(w-1), d, rid-(w-1), TAG_BRANCH)
leaf = (bid-(w-1), d, rid-(w-1), TAG_BTREE)
# remap branches to leaves if we aren't showing inner branches
if not inner:
@@ -813,7 +806,7 @@ def main(disk, mroots=None, *,
if not args.get('depth') or mdepth < args.get('depth'):
done, rid, tag, w, j, d, data, _ = mroot.lookup(-1, TAG_MTREE)
if not done and rid == -1 and tag == TAG_MTREE:
w, block, trunk, cksum = frombtree(data)
block, trunk, w, cksum = frombtree(data)
mtree = Rbyd.fetch(f, block_size, block, trunk)
mweight = w
@@ -1482,7 +1475,7 @@ def main(disk, mroots=None, *,
if not args.get('depth') or mdepth < args.get('depth'):
done, rid, tag, w, j, d, data, _ = mroot.lookup(-1, TAG_MTREE)
if not done and rid == -1 and tag == TAG_MTREE:
w, block, trunk, cksum = frombtree(data)
block, trunk, w, cksum = frombtree(data)
mtree = Rbyd.fetch(f, block_size, block, trunk)
# traverse entries