Files
littlefs/scripts/dbgbtree.py
Christopher Haster 546fff77fb Adopted full le16 tags instead of 14-bit leb128 tags
The main motivation for this was issues fitting a good tag encoding into
14-bits. The extra 2-bits (though really only 1 bit was needed) from
making this not a leb encoding opens up the space from 3 suptypes to
15 suptypes, which is nothing to shake a stick at.

The main downsides:
1. We can't rely on leb encoding for effectively-infinite extensions.
2. We can't shorten small tags (crcs, grows, shrinks) to one byte.

For 1., extending the leb encoding beyond 14-bits is already
unpalatable, because it would increase RAM costs in the tag
encoder/decoder,` which must assume a worst-case tag size, and would likely
add storage cost to every alt pointer, more on this in the next section.

The current encoding is quite generous, so I think it is unlikely we
will exceed the 16-bit encoding space. But even if we do, it's possible
to use a spare bit for an "extended" set of tags in the future.

As for 2., the lack of compression is a downside, but I've realized the
only tags that really matter storage-wise are the alt pointers. In any
rbyds there will be roughly O(m log m) alt pointers, but at most O(m) of
any other tags. What this means is that the encoding of any other tag is
in the noise of the encoding of our alt pointers.

Our alt pointers are already pretty densely packed. But because the
sparse key part of alt-pointers are stored as-is, the worst-case
encoding of in-tree tags likely ends up as the encoding of our
alt-pointers. So going up to 3-byte tags adds a surprisingly large
storage cost.

As a minor plus, le16s should be slightly cheaper to encode/decode. It
should also be slightly easier to debug tags on-disk.

  tag encoding:
                     TTTTtttt ttttTTTv
                        ^--------^--^^- 4+3-bit suptype
                                 '---|- 8-bit subtype
                                     '- valid bit
  iiii iiiiiii iiiiiii iiiiiii iiiiiii
                                     ^- m-bit id/weight
  llll lllllll lllllll lllllll lllllll
                                     ^- m-bit length/jump

Also renamed the "mk" tags, since they no longer have special behavior
outside of providing names for entries:
- LFSR_TAG_MK       => LFSR_TAG_NAME
- LFSR_TAG_MKBRANCH => LFSR_TAG_BNAME
- LFSR_TAG_MKREG    => LFSR_TAG_REG
- LFSR_TAG_MKDIR    => LFSR_TAG_DIR
2023-03-25 14:36:29 -05:00

653 lines
23 KiB
Python
Executable File

#!/usr/bin/env python3
import bisect
import itertools as it
import math as m
import os
import struct
TAG_NAME = 0x1000
TAG_BNAME = 0x1000
TAG_REG = 0x1010
TAG_DIR = 0x1020
TAG_STRUCT = 0x3000
TAG_INLINED = 0x3000
TAG_BLOCK = 0x3100
TAG_BRANCH = 0x3200
TAG_BTREE = 0x3300
TAG_UATTR = 0x4000
TAG_GROW = 0x0006
TAG_SHRINK = 0x0016
TAG_ALT = 0x0008
TAG_CRC = 0x0004
TAG_FCRC = 0x1004
def blocklim(s):
if '.' in s:
s = s.strip()
b = 10
if s.startswith('0x') or s.startswith('0X'):
s = s[2:]
b = 16
elif s.startswith('0o') or s.startswith('0O'):
s = s[2:]
b = 8
elif s.startswith('0b') or s.startswith('0B'):
s = s[2:]
b = 2
s0, s1 = s.split('.', 1)
return int(s0, b), int(s1, b)
else:
return int(s, 0)
def crc32c(data, crc=0):
crc ^= 0xffffffff
for b in data:
crc ^= b
for j in range(8):
crc = (crc >> 1) ^ ((crc & 1) * 0x82f63b78)
return 0xffffffff ^ crc
def fromle16(data):
if len(data) < 2:
return 0
return struct.unpack('<H', data[:2])[0]
def fromleb128(data):
word = 0
for i, b in enumerate(data):
word |= ((b & 0x7f) << 7*i)
word &= 0xffffffff
if not b & 0x80:
return word, i+1
return word, len(data)
def fromtag(data):
tag = fromle16(data)
id, delta = fromleb128(data[2:])
size, delta_ = fromleb128(data[2+delta:])
return tag&1, tag&~1, id if tag&0x8 else id-1, size, 2+delta+delta_
def popc(x):
return bin(x).count('1')
def xxd(data, width=16, crc=False):
for i in range(0, len(data), width):
yield '%-*s %-*s' % (
3*width,
' '.join('%02x' % b for b in data[i:i+width]),
width,
''.join(
b if b >= ' ' and b <= '~' else '.'
for b in map(chr, data[i:i+width])))
def tagrepr(tag, id, size, off=None):
if (tag & 0xf00c) == TAG_NAME:
return '%s%s id%d %d' % (
'rm' if tag & 0x2 else '',
'bname' if (tag & 0xfffe) == TAG_BNAME
else 'reg' if (tag & 0xfffe) == TAG_REG
else 'dir' if (tag & 0xfffe) == TAG_DIR
else 'name 0x%02x' % ((tag & 0x0ff0) >> 4),
id,
size)
elif (tag & 0xf00c) == TAG_STRUCT:
return '%s%s id%d %d' % (
'rm' if tag & 0x2 else '',
'inlined' if (tag & 0xfffe) == TAG_INLINED
else 'block' if (tag & 0xfffe) == TAG_BLOCK
else 'branch' if (tag & 0xfffe) == TAG_BRANCH
else 'btree' if (tag & 0xfffe) == TAG_BTREE
else 'struct 0x%02x' % ((tag & 0x0ff0) >> 4),
id,
size)
elif (tag & 0xf00c) == TAG_UATTR:
return '%suattr 0x%02x%s%s' % (
'rm' if tag & 0x2 else '',
(tag & 0x0ff0) >> 4,
' id%d' % id if id != -1 else '',
' %d' % size if not tag & 0x2 or size else '')
elif (tag & 0xfffe) == TAG_GROW:
return 'grow id%d w%d' % (
id,
size)
elif (tag & 0xfffe) == TAG_SHRINK:
return 'shrink id%d w%d' % (
id,
size)
elif (tag & 0xf00e) == TAG_CRC:
return 'crc%x%s %d' % (
1 if tag & 0x10 else 0,
' 0x%02x' % id if id != -1 else '',
size)
elif (tag & 0xfffe) == TAG_FCRC:
return 'fcrc%s %d' % (
' 0x%02x' % id if id != -1 else '',
size)
elif tag & 0x8:
return 'alt%s%s 0x%x w%d %s' % (
'r' if tag & 0x2 else 'b',
'gt' if tag & 0x4 else 'le',
tag & 0xfff0,
id,
'0x%x' % (0xffffffff & (off-size))
if off is not None
else '-%d' % off)
else:
return '0x%04x id%d %d' % (tag, id, size)
class Rbyd:
def __init__(self, block, limit, data, rev, off, trunk, weight):
self.block = block
self.limit = limit
self.data = data
self.rev = rev
self.off = off
self.trunk = trunk
self.weight = weight
@classmethod
def fetch(cls, f, block_size, block, limit):
# seek to the block
f.seek(block * block_size)
data = f.read(limit)
# fetch the rbyd
rev, = struct.unpack('<I', data[0:4].ljust(4, b'\0'))
crc = crc32c(data[0:4])
off = 0
j_ = 4
trunk = None
trunk_ = None
weight = 0
weight_ = 0
wastrunk = False
while j_ < limit:
v, tag, id, size, delta = fromtag(data[j_:])
if v != (popc(crc) & 1):
break
crc = crc32c(data[j_:j_+delta], crc)
j_ += delta
# find trunk
if not wastrunk and (tag & 0xe) != 0x4:
trunk_ = j_ - delta
wastrunk = not not tag & 0x8
# keep track of weight
if tag == TAG_GROW:
weight_ += size
elif tag == TAG_SHRINK:
weight_ = max(weight_ - size, 0)
# take care of crcs
if (tag & 0xe) <= 0x4:
if (tag & 0xf00f) != TAG_CRC:
crc = crc32c(data[j_:j_+size], crc)
# found a crc?
else:
crc_, = struct.unpack('<I', data[j_:j_+4].ljust(4, b'\0'))
if crc != crc_:
break
# commit what we have
off = j_ + size
trunk = trunk_
weight = weight_
j_ += size
return Rbyd(block, limit, data, rev, off, trunk, weight)
def lookup(self, tag, id):
if not self:
return True, 0, -1, 0, 0, 0, b''
lower = -1
upper = self.weight
# descend down tree
j = self.trunk
while True:
_, alt, weight_, jump, delta = fromtag(self.data[j:])
# found an alt?
if alt & 0x8:
# follow?
if ((id, tag & ~0xf) > (upper-weight_-1, alt & ~0xf)
if alt & 0x4
else ((id, tag & ~0xf) <= (lower+weight_, alt & ~0xf))):
lower += upper-lower-1-weight_ if alt & 0x4 else 0
upper -= upper-lower-1-weight_ if not alt & 0x4 else 0
j = j - jump
# stay on path
else:
lower += weight_ if not alt & 0x4 else 0
upper -= weight_ if alt & 0x4 else 0
j = j + delta
# found tag
else:
tag_ = alt
id_ = upper-1
w_ = id_-lower
done = (id_, tag_) < (id, tag) or tag_ & 2
return (done, tag_, id_, w_,
j, delta, self.data[j+delta:j+delta+jump])
def __bool__(self):
return self.trunk is not None
def __eq__(self, other):
return self.block == other.block and self.limit == other.limit
def __ne__(self, other):
return not self.__eq__(other)
def __iter__(self):
tag = 0
id = 0
while True:
done, tag, id, w, j, d, data = self.lookup(tag+0x10, id)
if done:
break
yield tag, id, w, j, d, data
def main(disk, block_size=None, trunk=0, limit=None, *,
color='auto',
**args):
# figure out what color should be
if color == 'auto':
color = sys.stdout.isatty()
elif color == 'always':
color = True
else:
color = False
# trunk may include a limit
if isinstance(trunk, tuple):
if limit is None:
limit = trunk[1]
trunk = trunk[0]
# we seek around a bunch, so just keep the disk open
with open(disk, 'rb') as f:
# if block_size is omitted, assume the block device is one big block
if block_size is None:
f.seek(0, os.SEEK_END)
block_size = f.tell()
# default limit to the block_size
if limit is None:
limit = block_size
# fetch the trunk
trunk = Rbyd.fetch(f, block_size, trunk, limit)
print('btree 0x%x.%x, rev %d, weight %d' % (
trunk.block, trunk.limit, trunk.rev, trunk.weight))
# look up an id, while keeping track of the search path
def lookup(id, depth=None):
rbyd = trunk
rid = id
depth_ = 1
path = []
# corrupted? return a corrupted block once
if not rbyd:
return (id > 0, id, 0, rbyd, -1, 0,
0, 0, b'',
0, 0, b'',
path)
while True:
# first lookup id/name
(done, name_tag, rid_, w,
name_j, name_d, name) = rbyd.lookup(TAG_NAME, rid)
if done:
return (True, id, 0, rbyd, -1, 0,
0, 0, b'',
0, 0, b'',
path)
if name_tag & 0xf00f == TAG_NAME:
# then lookup struct
(done, tag, _, _,
struct_j, struct_d, struct_) = rbyd.lookup(
TAG_STRUCT, rid_)
if done:
return (True, id, 0, rbyd, -1, 0,
0, 0, b'',
0, 0, b'',
path)
else:
tag = name_tag
struct_j, struct_d, struct_ = name_j, name_d, name
name_j, name_d, name = name_j, 0, b''
path.append((id + (rid_-rid), w, rbyd, rid_, tag,
name_j, name_d, name,
struct_j, struct_d, struct_))
# is it another branch? continue down tree
if tag == TAG_BRANCH and (depth is None or depth_ < depth):
block, delta = fromleb128(struct_)
limit, _ = fromleb128(struct_[delta:])
rbyd = Rbyd.fetch(f, block_size, block, limit)
# corrupted? bail here so we can keep traversing the tree
if not rbyd:
return (False, id + (rid_-rid), w, rbyd, -1, 0,
0, 0, b'',
0, 0, b'',
path)
rid -= (rid_-(w-1))
depth_ += 1
else:
return (False, id + (rid_-rid), w, rbyd, rid_, tag,
name_j, name_d, name,
struct_j, struct_d, struct_,
path)
# if we're printing the tree, first find the max depth so we know how
# much space to reserve
t_width = 0
if args.get('tree'):
t_depth = 0
id = -1
while True:
(done, id, w, rbyd, rid, tag,
name_j, name_d, name,
struct_j, struct_d, struct_,
path) = (lookup(id+1, depth=args.get('depth')))
if done:
break
t_depth = max(t_depth, len(path))
t_width = 2*t_depth+2 if t_depth > 0 else 0
t_branches = [(0, trunk.weight)]
def t_repr(id, w, d=None):
branches_ = []
for i in range(len(t_branches)):
if d is not None and d == i-1:
branches_.append('+')
elif i+1 < len(t_branches):
if (id-(w-1) == t_branches[i+1][0]
and t_branches[i][0] == t_branches[i+1][0]
and (not args.get('inner')
or (i == 0 and d == 0))):
branches_.append('+-')
elif (id-(w-1) == t_branches[i+1][0]
and t_branches[i][1] == t_branches[i+1][1]
and (not args.get('inner') or d == i)):
branches_.append('\'-')
elif (id-(w-1) == t_branches[i+1][0]
and (not args.get('inner') or d == i)):
branches_.append('|-')
elif (id-(w-1) >= t_branches[i][0]
and id-(w-1) < t_branches[i][1]
and t_branches[i][1] != t_branches[i+1][1]):
branches_.append('| ')
else:
branches_.append(' ')
else:
if (id-(w-1) == t_branches[i][0]
and (not args.get('inner') or i == 0)):
branches_.append('+-%s> ' % ('-'*2*(t_depth-i-1)))
elif id == t_branches[i][1]-1:
branches_.append('\'-%s> ' % ('-'*2*(t_depth-i-1)))
elif (id >= t_branches[i][0]
and id-(w-1) < t_branches[i][1]):
branches_.append('|-%s> ' % ('-'*2*(t_depth-i-1)))
return '%s%-*s%s' % (
'\x1b[90m' if color else '',
t_width, ''.join(branches_),
'\x1b[m' if color else '')
# print header
w_width = 2*m.ceil(m.log10(max(1, trunk.weight)+1))+1
print('%-9s %*s%-*s %-8s %-22s %s' % (
'block',
t_width, '',
w_width, 'ids',
'name',
'tag',
'data (truncated)'
if not args.get('no_truncate') else ''))
# traverse and print entries
id = -1
prbyd = None
ppath = []
corrupted = False
while True:
(done, id, w, rbyd, rid, tag,
name_j, name_d, name,
struct_j, struct_d, struct_,
path) = (lookup(id+1, depth=args.get('depth')))
if done:
break
if args.get('inner') or args.get('tree'):
t_branches = [(0, trunk.weight)]
changed = False
for i, (x, px) in enumerate(
it.zip_longest(path[:-1], ppath[:-1])):
if x is None:
break
(id_, w_, rbyd_, rid_, tag_,
name_j_, name_d_, name_,
struct_j_, struct_d_, struct__) = x
t_branches.append((id_-(w_-1), id_+1))
if args.get('inner'):
if not (changed or px is None or x != px):
continue
changed = True
# show human-readable representation
print('%10s %s%*s %-8s %-22s %s' % (
'%04x.%04x:' % (rbyd_.block, rbyd_.limit)
if prbyd is None or rbyd_ != prbyd
else '',
t_repr(id_, w_, i) if args.get('tree') else '',
w_width, '%d-%d' % (id_-(w_-1), id_)
if w_ > 1 else id_
if w_ > 0 else '',
''.join(
b if b >= ' ' and b <= '~' else '.'
for b in map(chr, name_)),
tagrepr(tag_, rid_, len(struct__), None),
next(xxd(struct__, 8), '')
if not args.get('no_truncate') else ''))
# show in-device representation
if args.get('device'):
print('%9s %*s%*s %8s %-22s%s' % (
'',
t_width, '',
w_width, '',
'',
'%04x %08x %07x' % (
tag_, 0xffffffff & rid_, len(struct__)),
' %s' % ' '.join(
'%08x' % struct.unpack('<I',
rbyd_.data[struct_j_+struct_d_+i*4
: struct_j_+struct_d_
+ min(i*4+4,len(struct__))]
.ljust(4, b'\0'))
for i in range(
min(m.ceil(len(struct__)/4), 3)))[:23]))
# show on-disk encoding of tags/data
for j, d, data in [
(name_j_, name_d_, name_),
(struct_j_, struct_d_, struct__)]:
if args.get('raw'):
for o, line in enumerate(
xxd(rbyd_.data[j:j+d])):
print('%9s: %s' % (
'%04x' % (j + o*16),
line))
# show on-disk encoding of tags
if args.get('raw') or args.get('no_truncate'):
for o, line in enumerate(xxd(data)):
print('%9s: %s' % (
'%04x' % (j+d + o*16),
line))
# prbyd here means the last rendered rbyd, we update
# here to always print interleaved addresses
prbyd = rbyd_
# corrupted? try to keep printing the tree
if not rbyd:
print('%04x.%04x: %s%s%s%s' % (
rbyd.block, rbyd.limit,
t_repr(id, w) if args.get('tree') else '',
'\x1b[31m' if color else '',
'(corrupted rbyd 0x%x.%x)' % (rbyd.block, rbyd.limit),
'\x1b[m' if color else ''))
prbyd = rbyd
ppath = path
corrupted = True
continue
# if we're not showing inner nodes, prefer names higher in the tree
# since this avoids showing vestigial names
if not args.get('inner'):
for (id_, w_, rbyd_, rid_, tag_,
name_j_, name_d_, name_,
struct_j_, struct_d_, struct__) in reversed(path):
name_j, name_d, name = name_j_, name_d_, name_
if rid_-(w_-1) != 0:
break
# show human-readable representation
print('%10s %s%*s %-8s %-22s %s' % (
'%04x.%04x:' % (rbyd.block, rbyd.limit)
if prbyd is None or rbyd != prbyd
else '',
t_repr(id, w) if args.get('tree') else '',
w_width, '%d-%d' % (id-(w-1), id)
if w > 1 else id
if w > 0 else '',
''.join(
b if b >= ' ' and b <= '~' else '.'
for b in map(chr, name)),
tagrepr(tag, rid, len(struct_), None),
next(xxd(struct_, 8), '')
if not args.get('no_truncate') else ''))
# show in-device representation
if args.get('device'):
print('%9s %*s%*s %8s %-22s%s' % (
'',
t_width, '',
w_width, '',
'',
'%04x %08x %07x' % (
tag, 0xffffffff & rid, len(struct_)),
' %s' % ' '.join(
'%08x' % struct.unpack('<I',
rbyd.data[struct_j+struct_d+i*4
: struct_j+struct_d
+ min(i*4+4,len(struct_))]
.ljust(4, b'\0'))
for i in range(
min(m.ceil(len(struct_)/4), 3)))[:23]))
# show on-disk encoding of tags/data
for j, d, data in [
(name_j, name_d, name),
(struct_j, struct_d, struct_)]:
if args.get('raw'):
for o, line in enumerate(xxd(rbyd.data[j:j+d])):
print('%9s: %s' % (
'%04x' % (j + o*16),
line))
# show on-disk encoding of tags
if args.get('raw') or args.get('no_truncate'):
for o, line in enumerate(xxd(data)):
print('%9s: %s' % (
'%04x' % (j+d + o*16),
line))
prbyd = rbyd
ppath = path
if args.get('error_on_corrupt') and corrupted:
sys.exit(2)
if __name__ == "__main__":
import argparse
import sys
parser = argparse.ArgumentParser(
description="Debug rbyd B-trees.",
allow_abbrev=False)
parser.add_argument(
'disk',
help="File containing the block device.")
parser.add_argument(
'trunk',
nargs='?',
type=blocklim,
help="Block address of the trunk of the tree.")
parser.add_argument(
'-B', '--block-size',
type=lambda x: int(x, 0),
help="Block size in bytes.")
parser.add_argument(
'-L', '--limit',
type=lambda x: int(x, 0),
help="Rbyd limit of the trunk of the tree (alias).")
parser.add_argument(
'--color',
choices=['never', 'always', 'auto'],
default='auto',
help="When to use terminal colors. Defaults to 'auto'.")
parser.add_argument(
'-r', '--raw',
action='store_true',
help="Show the raw data including tag encodings.")
parser.add_argument(
'-x', '--device',
action='store_true',
help="Show the device-side representation of tags.")
parser.add_argument(
'-T', '--no-truncate',
action='store_true',
help="Don't truncate, show the full contents.")
parser.add_argument(
'-i', '--inner',
action='store_true',
help="Show inner branches.")
parser.add_argument(
'-t', '--tree',
action='store_true',
help="Show the underlying B-tree.")
parser.add_argument(
'-Z', '--depth',
type=lambda x: int(x, 0),
help="Depth of tree to show.")
parser.add_argument(
'-e', '--error-on-corrupt',
action='store_true',
help="Error if B-tree is corrupt.")
sys.exit(main(**{k: v
for k, v in vars(parser.parse_intermixed_args()).items()
if v is not None}))