forked from Imagelibrary/littlefs
Adopted full le16 tags instead of 14-bit leb128 tags
The main motivation for this was issues fitting a good tag encoding into
14-bits. The extra 2-bits (though really only 1 bit was needed) from
making this not a leb encoding opens up the space from 3 suptypes to
15 suptypes, which is nothing to shake a stick at.
The main downsides:
1. We can't rely on leb encoding for effectively-infinite extensions.
2. We can't shorten small tags (crcs, grows, shrinks) to one byte.
For 1., extending the leb encoding beyond 14-bits is already
unpalatable, because it would increase RAM costs in the tag
encoder/decoder,` which must assume a worst-case tag size, and would likely
add storage cost to every alt pointer, more on this in the next section.
The current encoding is quite generous, so I think it is unlikely we
will exceed the 16-bit encoding space. But even if we do, it's possible
to use a spare bit for an "extended" set of tags in the future.
As for 2., the lack of compression is a downside, but I've realized the
only tags that really matter storage-wise are the alt pointers. In any
rbyds there will be roughly O(m log m) alt pointers, but at most O(m) of
any other tags. What this means is that the encoding of any other tag is
in the noise of the encoding of our alt pointers.
Our alt pointers are already pretty densely packed. But because the
sparse key part of alt-pointers are stored as-is, the worst-case
encoding of in-tree tags likely ends up as the encoding of our
alt-pointers. So going up to 3-byte tags adds a surprisingly large
storage cost.
As a minor plus, le16s should be slightly cheaper to encode/decode. It
should also be slightly easier to debug tags on-disk.
tag encoding:
TTTTtttt ttttTTTv
^--------^--^^- 4+3-bit suptype
'---|- 8-bit subtype
'- valid bit
iiii iiiiiii iiiiiii iiiiiii iiiiiii
^- m-bit id/weight
llll lllllll lllllll lllllll lllllll
^- m-bit length/jump
Also renamed the "mk" tags, since they no longer have special behavior
outside of providing names for entries:
- LFSR_TAG_MK => LFSR_TAG_NAME
- LFSR_TAG_MKBRANCH => LFSR_TAG_BNAME
- LFSR_TAG_MKREG => LFSR_TAG_REG
- LFSR_TAG_MKDIR => LFSR_TAG_DIR
This commit is contained in:
@@ -7,6 +7,22 @@ import os
|
||||
import struct
|
||||
|
||||
|
||||
TAG_NAME = 0x1000
|
||||
TAG_BNAME = 0x1000
|
||||
TAG_REG = 0x1010
|
||||
TAG_DIR = 0x1020
|
||||
TAG_STRUCT = 0x3000
|
||||
TAG_INLINED = 0x3000
|
||||
TAG_BLOCK = 0x3100
|
||||
TAG_BRANCH = 0x3200
|
||||
TAG_BTREE = 0x3300
|
||||
TAG_UATTR = 0x4000
|
||||
TAG_GROW = 0x0006
|
||||
TAG_SHRINK = 0x0016
|
||||
TAG_ALT = 0x0008
|
||||
TAG_CRC = 0x0004
|
||||
TAG_FCRC = 0x1004
|
||||
|
||||
def blocklim(s):
|
||||
if '.' in s:
|
||||
s = s.strip()
|
||||
@@ -34,6 +50,11 @@ def crc32c(data, crc=0):
|
||||
crc = (crc >> 1) ^ ((crc & 1) * 0x82f63b78)
|
||||
return 0xffffffff ^ crc
|
||||
|
||||
def fromle16(data):
|
||||
if len(data) < 2:
|
||||
return 0
|
||||
return struct.unpack('<H', data[:2])[0]
|
||||
|
||||
def fromleb128(data):
|
||||
word = 0
|
||||
for i, b in enumerate(data):
|
||||
@@ -44,10 +65,10 @@ def fromleb128(data):
|
||||
return word, len(data)
|
||||
|
||||
def fromtag(data):
|
||||
tag, delta = fromleb128(data)
|
||||
id, delta_ = fromleb128(data[delta:])
|
||||
size, delta__ = fromleb128(data[delta+delta_:])
|
||||
return tag&1, tag&~1, id if tag&0x8 else id-1, size, delta+delta_+delta__
|
||||
tag = fromle16(data)
|
||||
id, delta = fromleb128(data[2:])
|
||||
size, delta_ = fromleb128(data[2+delta:])
|
||||
return tag&1, tag&~1, id if tag&0x8 else id-1, size, 2+delta+delta_
|
||||
|
||||
def popc(x):
|
||||
return bin(x).count('1')
|
||||
@@ -63,42 +84,45 @@ def xxd(data, width=16, crc=False):
|
||||
for b in map(chr, data[i:i+width])))
|
||||
|
||||
def tagrepr(tag, id, size, off=None):
|
||||
if (tag & ~0x3f0) == 0x0400:
|
||||
return 'mk%s id%d %d' % (
|
||||
'branch' if ((tag & 0x3f0) >> 4) == 0x00
|
||||
else 'reg' if ((tag & 0x3f0) >> 4) == 0x01
|
||||
else 'dir' if ((tag & 0x3f0) >> 4) == 0x02
|
||||
else ' 0x%02x' % ((tag & 0x3f0) >> 4),
|
||||
if (tag & 0xf00c) == TAG_NAME:
|
||||
return '%s%s id%d %d' % (
|
||||
'rm' if tag & 0x2 else '',
|
||||
'bname' if (tag & 0xfffe) == TAG_BNAME
|
||||
else 'reg' if (tag & 0xfffe) == TAG_REG
|
||||
else 'dir' if (tag & 0xfffe) == TAG_DIR
|
||||
else 'name 0x%02x' % ((tag & 0x0ff0) >> 4),
|
||||
id,
|
||||
size)
|
||||
elif tag == 0x0800:
|
||||
return 'inlined id%d %d' % (id, size)
|
||||
elif tag == 0x0810:
|
||||
return 'block id%d %d' % (id, size)
|
||||
elif tag == 0x0820:
|
||||
return 'btree id%d %d' % (id, size)
|
||||
elif tag == 0x0830:
|
||||
return 'branch id%d %d' % (id, size)
|
||||
elif (tag & ~0xff2) == 0x2000:
|
||||
elif (tag & 0xf00c) == TAG_STRUCT:
|
||||
return '%s%s id%d %d' % (
|
||||
'rm' if tag & 0x2 else '',
|
||||
'inlined' if (tag & 0xfffe) == TAG_INLINED
|
||||
else 'block' if (tag & 0xfffe) == TAG_BLOCK
|
||||
else 'branch' if (tag & 0xfffe) == TAG_BRANCH
|
||||
else 'btree' if (tag & 0xfffe) == TAG_BTREE
|
||||
else 'struct 0x%02x' % ((tag & 0x0ff0) >> 4),
|
||||
id,
|
||||
size)
|
||||
elif (tag & 0xf00c) == TAG_UATTR:
|
||||
return '%suattr 0x%02x%s%s' % (
|
||||
'rm' if tag & 0x2 else '',
|
||||
(tag & 0xff0) >> 4,
|
||||
(tag & 0x0ff0) >> 4,
|
||||
' id%d' % id if id != -1 else '',
|
||||
' %d' % size if not tag & 0x2 or size else '')
|
||||
elif tag == 0x0006:
|
||||
elif (tag & 0xfffe) == TAG_GROW:
|
||||
return 'grow id%d w%d' % (
|
||||
id,
|
||||
size)
|
||||
elif tag == 0x0016:
|
||||
elif (tag & 0xfffe) == TAG_SHRINK:
|
||||
return 'shrink id%d w%d' % (
|
||||
id,
|
||||
size)
|
||||
elif (tag & ~0x10) == 0x0004:
|
||||
elif (tag & 0xf00e) == TAG_CRC:
|
||||
return 'crc%x%s %d' % (
|
||||
1 if tag & 0x10 else 0,
|
||||
' 0x%02x' % id if id != -1 else '',
|
||||
size)
|
||||
elif tag == 0x0024:
|
||||
elif (tag & 0xfffe) == TAG_FCRC:
|
||||
return 'fcrc%s %d' % (
|
||||
' 0x%02x' % id if id != -1 else '',
|
||||
size)
|
||||
@@ -106,7 +130,7 @@ def tagrepr(tag, id, size, off=None):
|
||||
return 'alt%s%s 0x%x w%d %s' % (
|
||||
'r' if tag & 0x2 else 'b',
|
||||
'gt' if tag & 0x4 else 'le',
|
||||
tag & 0x3ff0,
|
||||
tag & 0xfff0,
|
||||
id,
|
||||
'0x%x' % (0xffffffff & (off-size))
|
||||
if off is not None
|
||||
@@ -114,7 +138,6 @@ def tagrepr(tag, id, size, off=None):
|
||||
else:
|
||||
return '0x%04x id%d %d' % (tag, id, size)
|
||||
|
||||
|
||||
class Rbyd:
|
||||
def __init__(self, block, limit, data, rev, off, trunk, weight):
|
||||
self.block = block
|
||||
@@ -154,14 +177,14 @@ class Rbyd:
|
||||
wastrunk = not not tag & 0x8
|
||||
|
||||
# keep track of weight
|
||||
if tag == 0x0006:
|
||||
if tag == TAG_GROW:
|
||||
weight_ += size
|
||||
elif tag == 0x0016:
|
||||
elif tag == TAG_SHRINK:
|
||||
weight_ = max(weight_ - size, 0)
|
||||
|
||||
# take care of crcs
|
||||
if (tag & 0xe) <= 0x4:
|
||||
if (tag & ~0x10) != 0x04:
|
||||
if (tag & 0xf00f) != TAG_CRC:
|
||||
crc = crc32c(data[j_:j_+size], crc)
|
||||
# found a crc?
|
||||
else:
|
||||
@@ -284,24 +307,34 @@ def main(disk, block_size=None, trunk=0, limit=None, *,
|
||||
while True:
|
||||
# first lookup id/name
|
||||
(done, name_tag, rid_, w,
|
||||
name_j, name_d, name) = rbyd.lookup(0x400, rid)
|
||||
name_j, name_d, name) = rbyd.lookup(TAG_NAME, rid)
|
||||
if done:
|
||||
return True, id, 0, rbyd, -1, 0, 0, 0, b'', 0, 0, b'', path
|
||||
if name_tag & 0xf00 != 0x400:
|
||||
name_j, name_d, name = name_j, 0, b''
|
||||
return (True, id, 0, rbyd, -1, 0,
|
||||
0, 0, b'',
|
||||
0, 0, b'',
|
||||
path)
|
||||
|
||||
# then lookup struct
|
||||
(done, tag, _, _,
|
||||
struct_j, struct_d, struct_) = rbyd.lookup(0x800, rid_)
|
||||
if done:
|
||||
return True, id, 0, rbyd, -1, 0, 0, 0, b'', 0, 0, b'', path
|
||||
if name_tag & 0xf00f == TAG_NAME:
|
||||
# then lookup struct
|
||||
(done, tag, _, _,
|
||||
struct_j, struct_d, struct_) = rbyd.lookup(
|
||||
TAG_STRUCT, rid_)
|
||||
if done:
|
||||
return (True, id, 0, rbyd, -1, 0,
|
||||
0, 0, b'',
|
||||
0, 0, b'',
|
||||
path)
|
||||
else:
|
||||
tag = name_tag
|
||||
struct_j, struct_d, struct_ = name_j, name_d, name
|
||||
name_j, name_d, name = name_j, 0, b''
|
||||
|
||||
path.append((id + (rid_-rid), w, rbyd, rid_, tag,
|
||||
name_j, name_d, name,
|
||||
struct_j, struct_d, struct_))
|
||||
|
||||
# is it another branch? continue down tree
|
||||
if tag == 0x830 and (depth is None or depth_ < depth):
|
||||
if tag == TAG_BRANCH and (depth is None or depth_ < depth):
|
||||
block, delta = fromleb128(struct_)
|
||||
limit, _ = fromleb128(struct_[delta:])
|
||||
rbyd = Rbyd.fetch(f, block_size, block, limit)
|
||||
|
||||
@@ -16,6 +16,22 @@ COLORS = [
|
||||
]
|
||||
|
||||
|
||||
TAG_NAME = 0x1000
|
||||
TAG_BNAME = 0x1000
|
||||
TAG_REG = 0x1010
|
||||
TAG_DIR = 0x1020
|
||||
TAG_STRUCT = 0x3000
|
||||
TAG_INLINED = 0x3000
|
||||
TAG_BLOCK = 0x3100
|
||||
TAG_BRANCH = 0x3200
|
||||
TAG_BTREE = 0x3300
|
||||
TAG_UATTR = 0x4000
|
||||
TAG_GROW = 0x0006
|
||||
TAG_SHRINK = 0x0016
|
||||
TAG_ALT = 0x0008
|
||||
TAG_CRC = 0x0004
|
||||
TAG_FCRC = 0x1004
|
||||
|
||||
def blocklim(s):
|
||||
if '.' in s:
|
||||
s = s.strip()
|
||||
@@ -43,6 +59,11 @@ def crc32c(data, crc=0):
|
||||
crc = (crc >> 1) ^ ((crc & 1) * 0x82f63b78)
|
||||
return 0xffffffff ^ crc
|
||||
|
||||
def fromle16(data):
|
||||
if len(data) < 2:
|
||||
return 0
|
||||
return struct.unpack('<H', data[:2])[0]
|
||||
|
||||
def fromleb128(data):
|
||||
word = 0
|
||||
for i, b in enumerate(data):
|
||||
@@ -53,10 +74,10 @@ def fromleb128(data):
|
||||
return word, len(data)
|
||||
|
||||
def fromtag(data):
|
||||
tag, delta = fromleb128(data)
|
||||
id, delta_ = fromleb128(data[delta:])
|
||||
size, delta__ = fromleb128(data[delta+delta_:])
|
||||
return tag&1, tag&~1, id if tag&0x8 else id-1, size, delta+delta_+delta__
|
||||
tag = fromle16(data)
|
||||
id, delta = fromleb128(data[2:])
|
||||
size, delta_ = fromleb128(data[2+delta:])
|
||||
return tag&1, tag&~1, id if tag&0x8 else id-1, size, 2+delta+delta_
|
||||
|
||||
def popc(x):
|
||||
return bin(x).count('1')
|
||||
@@ -72,43 +93,45 @@ def xxd(data, width=16, crc=False):
|
||||
for b in map(chr, data[i:i+width])))
|
||||
|
||||
def tagrepr(tag, id, size, off=None):
|
||||
if (tag & ~0x3f0) == 0x0400:
|
||||
return '%smk%s id%d %d' % (
|
||||
if (tag & 0xf00c) == TAG_NAME:
|
||||
return '%s%s id%d %d' % (
|
||||
'rm' if tag & 0x2 else '',
|
||||
'branch' if ((tag & 0x3f0) >> 4) == 0x00
|
||||
else 'reg' if ((tag & 0x3f0) >> 4) == 0x01
|
||||
else 'dir' if ((tag & 0x3f0) >> 4) == 0x02
|
||||
else ' 0x%02x' % ((tag & 0x3f0) >> 4),
|
||||
'bname' if (tag & 0xfffe) == TAG_BNAME
|
||||
else 'reg' if (tag & 0xfffe) == TAG_REG
|
||||
else 'dir' if (tag & 0xfffe) == TAG_DIR
|
||||
else 'name 0x%02x' % ((tag & 0x0ff0) >> 4),
|
||||
id,
|
||||
size)
|
||||
elif tag == 0x0800:
|
||||
return 'inlined id%d %d' % (id, size)
|
||||
elif tag == 0x0810:
|
||||
return 'block id%d %d' % (id, size)
|
||||
elif tag == 0x0820:
|
||||
return 'btree id%d %d' % (id, size)
|
||||
elif tag == 0x0830:
|
||||
return 'branch id%d %d' % (id, size)
|
||||
elif (tag & ~0xff2) == 0x2000:
|
||||
elif (tag & 0xf00c) == TAG_STRUCT:
|
||||
return '%s%s id%d %d' % (
|
||||
'rm' if tag & 0x2 else '',
|
||||
'inlined' if (tag & 0xfffe) == TAG_INLINED
|
||||
else 'block' if (tag & 0xfffe) == TAG_BLOCK
|
||||
else 'branch' if (tag & 0xfffe) == TAG_BRANCH
|
||||
else 'btree' if (tag & 0xfffe) == TAG_BTREE
|
||||
else 'struct 0x%02x' % ((tag & 0x0ff0) >> 4),
|
||||
id,
|
||||
size)
|
||||
elif (tag & 0xf00c) == TAG_UATTR:
|
||||
return '%suattr 0x%02x%s%s' % (
|
||||
'rm' if tag & 0x2 else '',
|
||||
(tag & 0xff0) >> 4,
|
||||
(tag & 0x0ff0) >> 4,
|
||||
' id%d' % id if id != -1 else '',
|
||||
' %d' % size if not tag & 0x2 or size else '')
|
||||
elif tag == 0x0006:
|
||||
elif (tag & 0xfffe) == TAG_GROW:
|
||||
return 'grow id%d w%d' % (
|
||||
id,
|
||||
size)
|
||||
elif tag == 0x0016:
|
||||
elif (tag & 0xfffe) == TAG_SHRINK:
|
||||
return 'shrink id%d w%d' % (
|
||||
id,
|
||||
size)
|
||||
elif (tag & ~0x10) == 0x0004:
|
||||
elif (tag & 0xf00e) == TAG_CRC:
|
||||
return 'crc%x%s %d' % (
|
||||
1 if tag & 0x10 else 0,
|
||||
' 0x%02x' % id if id != -1 else '',
|
||||
size)
|
||||
elif tag == 0x0024:
|
||||
elif (tag & 0xfffe) == TAG_FCRC:
|
||||
return 'fcrc%s %d' % (
|
||||
' 0x%02x' % id if id != -1 else '',
|
||||
size)
|
||||
@@ -116,7 +139,7 @@ def tagrepr(tag, id, size, off=None):
|
||||
return 'alt%s%s 0x%x w%d %s' % (
|
||||
'r' if tag & 0x2 else 'b',
|
||||
'gt' if tag & 0x4 else 'le',
|
||||
tag & 0x3ff0,
|
||||
tag & 0xfff0,
|
||||
id,
|
||||
'0x%x' % (0xffffffff & (off-size))
|
||||
if off is not None
|
||||
@@ -235,7 +258,7 @@ def show_log(block_size, data, rev, off, *,
|
||||
j_ += size
|
||||
|
||||
# note we ignore out-of-bounds here for debugging
|
||||
if tag == 0x0006:
|
||||
if tag == TAG_GROW:
|
||||
# grow lifetimes
|
||||
i, id_ = index(weights, id)
|
||||
if id_ > 0:
|
||||
@@ -247,7 +270,7 @@ def show_log(block_size, data, rev, off, *,
|
||||
|
||||
checkpoint(j, weights, lifetimes, {i}, set(), {i})
|
||||
|
||||
elif tag == 0x0016:
|
||||
elif tag == TAG_SHRINK:
|
||||
# shrink lifetimes
|
||||
i, id_ = index(weights, id)
|
||||
size_ = size
|
||||
@@ -354,7 +377,7 @@ def show_log(block_size, data, rev, off, *,
|
||||
j_ += delta
|
||||
|
||||
if (tag & 0xe) <= 0x4:
|
||||
if (tag & ~0x10) != 0x04:
|
||||
if (tag & 0xf00f) != TAG_CRC:
|
||||
crc = crc32c(data[j_:j_+size], crc)
|
||||
# found a crc?
|
||||
else:
|
||||
@@ -707,14 +730,14 @@ def main(disk, block_size=None, block1=0, block2=None, *,
|
||||
wastrunk = not not tag & 0x8
|
||||
|
||||
# keep track of weight
|
||||
if tag == 0x0006:
|
||||
if tag == TAG_GROW:
|
||||
weight_ += size
|
||||
elif tag == 0x0016:
|
||||
elif tag == TAG_SHRINK:
|
||||
weight_ = max(weight_ - size, 0)
|
||||
|
||||
# take care of crcs
|
||||
if (tag & 0xe) <= 0x4:
|
||||
if (tag & ~0x10) != 0x04:
|
||||
if (tag & 0xf00f) != TAG_CRC:
|
||||
crc = crc32c(data[j_:j_+size], crc)
|
||||
# found a crc?
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user