Files
littlefs/scripts/dbgrbyd.py
Christopher Haster 98532f3287 Adding sparse ids to rbyd trees
The way sparse ids interact with our flat id+attr tree is a bit wonky.

Normally, with weighted trees, one entry is associated with one weight.
But since our rbyd trees use id+attr pairs as keys, in theory each set of
id+attr pairs should share a single weight.

  +-+-+-+-> id0,attr0   -.
  | | | '-> id0,attr1    +- weight 5
  | | '-+-> id0,attr2   -'
  | |   |
  | |   '-> id5,attr0   -.
  | '-+-+-> id5,attr1    +- weight 5
  |   | '-> id5,attr2   -'
  |   |
  |   '-+-> id10,attr0  -.
  |     '-> id10,attr1   +- weight 5
  '-------> id10,attr2  -'

To make this representable, we could give a single id+attr pair the
weight, and make the other attrs have a weight of zero. In our current
scheme, attr0 (actually LFSR_TAG_MK) is the only attr required for every
id, and it has the benefit of being the first attr found during
traversal. So it is the obvious choice for storing the id's effective weight.

But there's still some trickiness. Keep in mind our ids are derived from
the weights in the rbyd tree. So if follow intuition and implement this naively:

  +-+-+-+-> id0,attr0   weight 5
  | | | '-> id5,attr1   weight 0
  | | '-+-> id5,attr2   weight 0
  | |   |
  | |   '-> id5,attr0   weight 5
  | '-+-+-> id10,attr1  weight 0
  |   | '-> id10,attr2  weight 0
  |   |
  |   '-+-> id10,attr0  weight 5
  |     '-> id15,attr1  weight 0
  '-------> id15,attr2  weight 0

Suddenly the ids in the attr sets don't match!

It may be possible to work around this with special cases for attr0, but
this would complicate the code and make the presence of attr0 a strict
requirement.

Instead, if we associate each attr set with not the smallest id in the
weight but the largest id in the weight, so id' = id+(weight-1), then
our requirements work out while still keeping each attr set on the same
low-level id:

  +-+-+-+-> id4,attr0   weight 5
  | | | '-> id4,attr1   weight 0
  | | '-+-> id4,attr2   weight 0
  | |   |
  | |   '-> id9,attr0   weight 5
  | '-+-+-> id9,attr1   weight 0
  |   | '-> id9,attr2   weight 0
  |   |
  |   '-+-> id14,attr0  weight 5
  |     '-> id14,attr1  weight 0
  '-------> id14,attr2  weight 0

To be blunt, this is unintuitive, and I'm worried it may be its own
source of complexity/bugs. But this representation does solve the problem
at hand, so I'm just going to see how it works out.
2023-03-17 14:19:49 -05:00

740 lines
25 KiB
Python
Executable File

#!/usr/bin/env python3
import bisect
import itertools as it
import math as m
import os
import struct
COLORS = [
'34', # blue
'31', # red
'32', # green
'35', # purple
'33', # yellow
'36', # cyan
]
def crc32c(data, crc=0):
crc ^= 0xffffffff
for b in data:
crc ^= b
for j in range(8):
crc = (crc >> 1) ^ ((crc & 1) * 0x82f63b78)
return 0xffffffff ^ crc
def fromleb128(data):
word = 0
for i, b in enumerate(data):
word |= ((b & 0x7f) << 7*i)
word &= 0xffffffff
if not b & 0x80:
return word, i+1
return word, len(data)
def fromtag(data):
tag, delta = fromleb128(data)
id, delta_ = fromleb128(data[delta:])
size, delta__ = fromleb128(data[delta+delta_:])
return tag&1, tag&~1, id-1, size, delta+delta_+delta__
def popc(x):
return bin(x).count('1')
def xxd(data, width=16, crc=False):
for i in range(0, len(data), width):
yield '%-*s %-*s' % (
3*width,
' '.join('%02x' % b for b in data[i:i+width]),
width,
''.join(
b if b >= ' ' and b <= '~' else '.'
for b in map(chr, data[i:i+width])))
def tagrepr(tag, id, w, size, off=None):
if (tag & ~0x3f0) == 0x0400:
return 'mk%s id%d%s %d' % (
'branch' if ((tag & 0x3f0) >> 4) == 0x00
else 'reg' if ((tag & 0x3f0) >> 4) == 0x01
else 'dir' if ((tag & 0x3f0) >> 4) == 0x02
else ' 0x%02x' % ((tag & 0x3f0) >> 4),
id,
' w%d' % w if w is not None else '',
size)
elif (tag & ~0xff2) == 0x2000:
return '%suattr 0x%02x%s%s' % (
'rm' if tag & 0x2 else '',
(tag & 0xff0) >> 4,
' id%d' % id if id != -1 else '',
' %d' % size if not tag & 0x2 or size else '')
elif tag == 0x0006:
return 'grow id%d w%d' % (
id,
size)
elif tag == 0x0016:
return 'shrink id%d w%d' % (
id,
size)
elif (tag & ~0x10) == 0x0004:
return 'crc%x%s %d' % (
1 if tag & 0x10 else 0,
' 0x%02x' % id if id != -1 else '',
size)
elif tag == 0x0024:
return 'fcrc%s %d' % (
' 0x%02x' % id if id != -1 else '',
size)
elif tag & 0x8:
return 'alt%s%s 0x%x w%d %s' % (
'r' if tag & 0x2 else 'b',
'gt' if tag & 0x4 else 'le',
tag & 0x3ff0,
id+1,
'0x%x' % (0xffffffff & (off-size))
if off is not None
else '-%d' % off)
else:
return '0x%04x id%d %d' % (tag, id, size)
def show_log(block_size, data, rev, off, *,
color=False,
**args):
crc = crc32c(data[0:4])
# preprocess jumps
if args.get('jumps'):
jumps = []
j_ = 4
while j_ < (block_size if args.get('all') else off):
j = j_
v, tag, id, size, delta = fromtag(data[j_:])
j_ += delta
if (tag & 0xe) <= 0x4:
j_ += size
if tag & 0x8:
# figure out which alt color
if tag & 0x2:
_, ntag, _, _, _ = fromtag(data[j_:])
if ntag & 0x2:
jumps.append((j, j-size, 0, 'y'))
else:
jumps.append((j, j-size, 0, 'r'))
else:
jumps.append((j, j-size, 0, 'b'))
# figure out x-offsets to avoid collisions between jumps
for j in range(len(jumps)):
a, b, _, c = jumps[j]
x = 0
while any(
max(a, b) >= min(a_, b_)
and max(a_, b_) >= min(a, b)
and x == x_
for a_, b_, x_, _ in jumps[:j]):
x += 1
jumps[j] = a, b, x, c
def jumprepr(j):
# render jumps
chars = {}
for a, b, x, c in jumps:
c_start = (
'\x1b[33m' if color and c == 'y'
else '\x1b[31m' if color and c == 'r'
else '\x1b[90m' if color
else '')
c_stop = '\x1b[m' if color else ''
if j == a:
for x_ in range(2*x+1):
chars[x_] = '%s-%s' % (c_start, c_stop)
chars[2*x+1] = '%s\'%s' % (c_start, c_stop)
elif j == b:
for x_ in range(2*x+1):
chars[x_] = '%s-%s' % (c_start, c_stop)
chars[2*x+1] = '%s.%s' % (c_start, c_stop)
chars[0] = '%s<%s' % (c_start, c_stop)
elif j >= min(a, b) and j <= max(a, b):
chars[2*x+1] = '%s|%s' % (c_start, c_stop)
return ''.join(chars.get(x, ' ')
for x in range(max(chars.keys(), default=0)+1))
# preprocess lifetimes
if args.get('lifetimes'):
def index(weights, id):
for i, w in enumerate(weights):
if id < w:
return i, id
id -= w
return len(weights)-1, -1
def ranges(weights):
return zip(
it.chain([0], it.accumulate(weights)),
it.accumulate(weights))
weights = [0]
grow = None
colors = ['']
colors_i = 0
lifetimes = [(0, 0, -1, weights.copy(), colors.copy())]
j_ = 4
while j_ < (block_size if args.get('all') else off):
j = j_
v, tag, id, size, delta = fromtag(data[j_:])
j_ += delta
if (tag & 0xe) <= 0x4:
j_ += size
# note these slices are also copying the arrays
if grow is not None:
if (tag & ~0x3f0) == 0x0400 and id == grow[1]:
i, p = index(weights, id)
weights[i:i+1] = [p+1, weights[i]-(p+1)]
colors[i:i+1] = [COLORS[colors_i % len(COLORS)], colors[i]]
colors_i += 1
lifetimes.append((grow[0],
+1, id, weights[:-1], colors[:-1]))
grow = None
elif not tag & 0x8:
lifetimes.append((grow[0],
0, grow[1], weights[:-1], colors[:-1]))
grow = None
if tag == 0x0006:
i, _ = index(weights, id)
weights[i] += size
grow = j, id
elif tag == 0x0016:
i, _ = index(weights, id)
if weights[i] == size and len(weights) > 1:
lifetimes.append((j,
-1, id, weights[:-1], colors[:-1]))
weights[i:i+1] = []
colors[i:i+1] = []
else:
weights[i] = max(weights[i] - size, 0)
lifetimes.append((j,
0, id-size, weights[:-1], colors[:-1]))
elif not tag & 0x8:
lifetimes.append((j,
0, id, weights[:-1], colors[:-1]))
lifetimes_j = [j for j, _, _, _, _ in lifetimes]
width = 2*max(len(weights) for _, _, _, weights, _ in lifetimes)
def lifetimerepr(j):
j_, g, id, weights, colors = lifetimes[
bisect.bisect(lifetimes_j, j)-1]
if j != j_:
g, id = 0, -1
return '%s%*s' % (
''.join(
'%s%s%s' % (
'\x1b[%sm' % c if color else '',
'.' if g > 0 and id >= a and id < b
else '\\ ' if g > 0 and id < a
else '\'' if g < 0 and id >= a and id < b
else '/ ' if g < 0 and id < a
else '* ' if not tag & 0x8 and id >= a and id < b
else '| ',
'\x1b[m' if color else '')
for (a, b), c in zip(ranges(weights), colors)),
width - 2*len(weights) + (1 if g else 0), '')
# print header
print('%-8s %s%-22s %s' % (
'off',
lifetimerepr(0) if args.get('lifetimes') else '',
'tag',
'data (truncated)'
if not args.get('no_truncate') else ''))
# print revision count
if args.get('raw'):
print('%8s: %s' % ('%04x' % 0, next(xxd(data[0:4]))))
# print tags
j_ = 4
while j_ < (block_size if args.get('all') else off):
notes = []
j = j_
v, tag, id, size, delta = fromtag(data[j_:])
if v != (popc(crc) & 1):
notes.append('v!=%x' % (popc(crc) & 1))
tag &= ~1
crc = crc32c(data[j_:j_+delta], crc)
j_ += delta
if (tag & 0xe) <= 0x4:
if (tag & ~0x10) != 0x04:
crc = crc32c(data[j_:j_+size], crc)
# found a crc?
else:
crc_, = struct.unpack('<I', data[j_:j_+4].ljust(4, b'\0'))
if crc != crc_:
notes.append('crc!=%08x' % crc)
j_ += size
# show human-readable tag representation
print('%s%08x:%s %s%s%-57s%s%s' % (
'\x1b[90m' if color and j >= off else '',
j,
'\x1b[m' if color and j >= off else '',
lifetimerepr(j) if args.get('lifetimes') else '',
'\x1b[90m' if color and j >= off else '',
'%-22s%s' % (
tagrepr(tag, id, None, size, j),
' %s' % next(xxd(
data[j+delta:j+delta+min(size, 8)], 8), '')
if not args.get('no_truncate')
and (tag & 0xe) <= 0x4 else ''),
'\x1b[m' if color and j >= off else '',
' (%s)' % ', '.join(notes) if notes
else ' %s' % jumprepr(j)
if args.get('jumps')
else ''))
if args.get('raw'):
# show on-disk encoding of tags
for o, line in enumerate(xxd(data[j:j+delta])):
print('%s%8s: %s%s' % (
'\x1b[90m' if color and j >= off else '',
'%04x' % (j + o*16),
line,
'\x1b[m' if color and j >= off else ''))
# show in-device representation, including some extra
# crc/parity info
if args.get('device'):
print('%s%8s %s%-47s %08x %x%s' % (
'\x1b[90m' if color and j >= off else '',
'',
lifetimerepr(0) if args.get('lifetimes') else '',
'%-22s%s' % (
'%04x %08x %07x' % (tag, 0xffffffff & id, size),
' %s' % ' '.join(
'%08x' % struct.unpack('<I',
data[j+delta+i*4:j+delta+min(i*4+4,size)]
.ljust(4, b'\0'))
for i in range(min(m.ceil(size/4), 3)))[:23]
if (tag & 0xe) <= 0x4 else ''),
crc,
popc(crc) & 1,
'\x1b[m' if color and j >= off else ''))
if (tag & 0xe) <= 0x4:
# show on-disk encoding of data
if args.get('raw') or args.get('no_truncate'):
for o, line in enumerate(xxd(data[j+delta:j+delta+size])):
print('%s%8s: %s%s' % (
'\x1b[90m' if color and j >= off else '',
'%04x' % (j+delta + o*16),
line,
'\x1b[m' if color and j >= off else ''))
def show_tree(block_size, data, rev, trunk, weight, *,
color=False,
**args):
if trunk is None:
return
# lookup a tag, returning also the search path for decoration
# purposes
def lookup(tag, id):
lower = -1
upper = weight
path = []
# descend down tree
j = trunk
while True:
_, alt, weight_, jump, delta = fromtag(data[j:])
# found an alt?
if alt & 0x8:
weight_ += 1
# follow?
if ((id, tag & ~0xf) > (upper-weight_-1, alt & ~0xf)
if alt & 0x4
else ((id, tag & ~0xf) <= (lower+weight_, alt & ~0xf))):
lower += upper-lower-1-weight_ if alt & 0x4 else 0
upper -= upper-lower-1-weight_ if not alt & 0x4 else 0
j = j - jump
if args.get('tree'):
# figure out which color
if alt & 0x2:
_, nalt, _, _, _ = fromtag(data[j+jump+delta:])
if nalt & 0x2:
path.append((j+jump, j, 'y'))
else:
path.append((j+jump, j, 'r'))
else:
path.append((j+jump, j, 'b'))
# stay on path
else:
lower += weight_ if not alt & 0x4 else 0
upper -= weight_ if alt & 0x4 else 0
j = j + delta
if args.get('tree'):
# figure out which color
if alt & 0x2:
_, nalt, _, _, _ = fromtag(data[j:])
if nalt & 0x2:
path.append((j-delta, j, 'y'))
else:
path.append((j-delta, j, 'r'))
else:
path.append((j-delta, j, 'b'))
# found tag
else:
tag_ = alt
id_ = upper-1
w_ = id_-lower
done = (id_, tag_) < (id, tag) or tag_ & 2
return done, tag_, id_, w_, j, delta, jump, path
# precompute tree
if args.get('tree'):
tags = []
paths = {}
tag, id = 0, -1
while True:
done, tag, id, w, j, delta, size, path = lookup(tag+0x10, id)
# found end of tree?
if done:
break
tags.append((j, tag, id))
for x, (a, b, c) in enumerate(path):
paths[a, b, x] = c
# align paths to nearest tag
tags.sort()
paths = {(
tags[bisect.bisect_left(tags, (a, 0, -1), hi=len(tags)-1)],
tags[bisect.bisect_left(tags, (b, 0, -1), hi=len(tags)-1)],
x): c for (a, b, x), c in paths.items()}
# also find the maximum depth
depth = max((x+1 for _, _, x in paths.keys()), default=0)
def treerepr(j):
if depth == 0:
return ''
_, tag, id = tags[bisect.bisect_left(
tags, (j, 0, -1), hi=len(tags)-1)]
def c_start(c):
return ('\x1b[33m' if color and c == 'y'
else '\x1b[31m' if color and c == 'r'
else '\x1b[90m' if color
else '')
def c_stop(c):
return '\x1b[m' if color else ''
path = []
seen = None
for x in range(depth):
if any(x == x_ and tag == a_tag and id == a_id
for (_, a_tag, a_id), _, x_ in paths.keys()):
c = next(c
for ((_, a_tag, a_id), _, x_), c in paths.items()
if x == x_ and tag == a_tag and id == a_id)
path.append('%s+%s' % (c_start(c), c_stop(c)))
elif any(x == x_ and tag == b_tag and id == b_id
for _, (_, b_tag, b_id), x_ in paths.keys()):
a_tag, a_id, c = next((a_tag, a_id, c)
for ((_, a_tag, a_id), (_, b_tag, b_id), x_), c
in paths.items()
if x == x_ and tag == b_tag and id == b_id)
if (a_id, a_tag) < (id, tag):
path.append('%s\'%s' % (c_start(c), c_stop(c)))
else:
path.append('%s.%s' % (c_start(c), c_stop(c)))
elif any(x == x_
and (id, tag) >= min((a_id, a_tag), (b_id, b_tag))
and (id, tag) <= max((a_id, a_tag), (b_id, b_tag))
for (_, a_tag, a_id), (_, b_tag, b_id), x_
in paths.keys()):
c = next(c
for ((_, a_tag, a_id), (_, b_tag, b_id), x_), c
in paths.items()
if x == x_
and (id, tag) >= min((a_id, a_tag), (b_id, b_tag))
and (id, tag) <= max((a_id, a_tag), (b_id, b_tag)))
path.append('%s|%s' % (c_start(c), c_stop(c)))
elif seen:
path.append('%s-%s' % (c_start(seen), c_stop(seen)))
else:
path.append(' ')
if any(x == x_ and tag == b_tag and id == b_id
for _, (_, b_tag, b_id), x_ in paths.keys()):
c = next(c
for (_, (_, b_tag, b_id), x_), c in paths.items()
if x == x_ and tag == b_tag and id == b_id)
seen = c
if seen and x == depth-1:
path.append('%s>%s' % (c_start(seen), c_stop(seen)))
elif seen:
path.append('%s-%s' % (c_start(seen), c_stop(seen)))
else:
path.append(' ')
return ' %s' % ''.join(path)
# print header
print('%-8s %*s%-22s %s' % (
'off',
2*depth+1 if args.get('tree') and depth > 0 else 0, '',
'tag',
'data (truncated)'
if not args.get('no_truncate') else ''))
tag, id = 0, -1
while True:
done, tag, id, w, j, delta, size, path = lookup(tag+0x10, id)
# found end of tree?
if done:
break
# show human-readable tag representation
print('%08x:%s %-57s' % (
j,
treerepr(j) if args.get('tree') else '',
'%-22s%s' % (
tagrepr(tag, id, w, size, j),
' %s' % next(xxd(
data[j+delta:j+delta+min(size, 8)], 8), '')
if not args.get('no_truncate')
and (tag & 0xe) <= 0x4 else '')))
if args.get('raw'):
# show on-disk encoding of tags
for o, line in enumerate(xxd(data[j:j+delta])):
print('%8s: %s' % (
'%04x' % (j + o*16),
line))
# show in-device representation, including some extra
# crc/parity info
if args.get('device'):
print('%8s %s%-47s' % (
'',
lifetimerepr(0) if args.get('lifetimes') else '',
'%-22s%s' % (
'%04x %08x %07x' % (tag, 0xffffffff & id, size),
' %s' % ' '.join(
'%08x' % struct.unpack('<I',
data[j+delta+i*4:j+delta+min(i*4+4,size)]
.ljust(4, b'\0'))
for i in range(min(m.ceil(size/4), 3)))[:23]
if (tag & 0xe) <= 0x4 else '')))
if (tag & 0xe) <= 0x4:
# show on-disk encoding of data
if args.get('raw') or args.get('no_truncate'):
for o, line in enumerate(xxd(data[j+delta:j+delta+size])):
print('%8s: %s' % (
'%04x' % (j+delta + o*16),
line))
def main(disk, block_size=None, block1=0, block2=None, *,
trunk=None,
color='auto',
**args):
# figure out what color should be
if color == 'auto':
color = sys.stdout.isatty()
elif color == 'always':
color = True
else:
color = False
with open(disk, 'rb') as f:
# if block_size is omitted, assume the block device is one big block
if block_size is None:
f.seek(0, os.SEEK_END)
block_size = f.tell()
# read each block
blocks = [block for block in [block1, block2] if block is not None]
datas = []
for block in blocks:
f.seek(block * block_size)
datas.append(f.read(block_size))
# first figure out which block as the most recent revision
def fetch(data):
rev, = struct.unpack('<I', data[0:4].ljust(4, b'\0'))
crc = crc32c(data[0:4])
off = 0
j_ = 4
trunk = None
trunk_ = None
weight = 0
weight_ = 0
wastrunk = False
while j_ < block_size:
v, tag, id, size, delta = fromtag(data[j_:])
if v != (popc(crc) & 1):
break
crc = crc32c(data[j_:j_+delta], crc)
j_ += delta
# find trunk
if not wastrunk and (tag & 0xe) != 0x4:
trunk_ = j_ - delta
wastrunk = not not tag & 0x8
# keep track of weight
if tag == 0x0006:
weight_ += size
elif tag == 0x0016:
weight_ = max(weight_ - size, 0)
# take care of crcs
if (tag & 0xe) <= 0x4:
if (tag & ~0x10) != 0x04:
crc = crc32c(data[j_:j_+size], crc)
# found a crc?
else:
crc_, = struct.unpack('<I', data[j_:j_+4].ljust(4, b'\0'))
if crc != crc_:
break
# commit what we have
off = j_ + size
trunk = trunk_
weight = weight_
j_ += size
return rev, off, trunk, weight
revs, offs, trunks, weights = [], [], [], []
i = 0
for block, data in zip(blocks, datas):
rev, off, trunk_, weight = fetch(data)
revs.append(rev)
offs.append(off)
trunks.append(trunk_)
weights.append(weight)
# compare with sequence arithmetic
if off and ((rev - revs[i]) & 0x80000000):
i = len(revs)-1
# print contents of the winning metadata block
block, data, rev, off, trunk, weight = (
blocks[i], datas[i], revs[i], offs[i],
trunk if trunk is not None else trunks[i],
weights[i])
print('mdir 0x%x, rev %d, size %d, weight %d%s' % (
block, rev, off, weight,
' (was 0x%x, %d, %d, %d)' % (
blocks[~i], revs[~i], offs[~i], weights[~i])
if len(blocks) > 1 else ''))
if args.get('log'):
show_log(block_size, data, rev, off,
color=color,
**args)
else:
show_tree(block_size, data, rev, trunk, weight,
color=color,
**args)
if args.get('error_on_corrupt') and off == 0:
sys.exit(2)
if __name__ == "__main__":
import argparse
import sys
parser = argparse.ArgumentParser(
description="Debug rbyd metadata.",
allow_abbrev=False)
parser.add_argument(
'disk',
help="File containing the block device.")
parser.add_argument(
'block_size',
nargs='?',
type=lambda x: int(x, 0),
help="Block size in bytes.")
parser.add_argument(
'block1',
nargs='?',
type=lambda x: int(x, 0),
help="Block address of the first metadata block.")
parser.add_argument(
'block2',
nargs='?',
type=lambda x: int(x, 0),
help="Block address of the second metadata block.")
parser.add_argument(
'--trunk',
type=lambda x: int(x, 0),
help="Use this offset as the trunk of the tree.")
parser.add_argument(
'--color',
choices=['never', 'always', 'auto'],
default='auto',
help="When to use terminal colors. Defaults to 'auto'.")
parser.add_argument(
'-a', '--all',
action='store_true',
help="Don't stop parsing on bad commits.")
parser.add_argument(
'-l', '--log',
action='store_true',
help="Show the raw tags as they appear in the log.")
parser.add_argument(
'-r', '--raw',
action='store_true',
help="Show the raw data including tag encodings.")
parser.add_argument(
'-x', '--device',
action='store_true',
help="Show the device-side representation of tags.")
parser.add_argument(
'-T', '--no-truncate',
action='store_true',
help="Don't truncate, show the full contents.")
parser.add_argument(
'-t', '--tree',
action='store_true',
help="Show the rbyd tree.")
parser.add_argument(
'-j', '--jumps',
action='store_true',
help="Show alt pointer jumps in the margin.")
parser.add_argument(
'-g', '--lifetimes',
action='store_true',
help="Show inserts/deletes of ids in the margin.")
parser.add_argument(
'-e', '--error-on-corrupt',
action='store_true',
help="Error if no valid commit is found.")
sys.exit(main(**{k: v
for k, v in vars(parser.parse_intermixed_args()).items()
if v is not None}))