littlefs/scripts/dbgrbyd.py

#!/usr/bin/env python3

# prevent local imports
if __name__ == "__main__":
    __import__('sys').path.pop(0)

import bisect
import collections as co
import itertools as it
import math as mt
import os
import struct


COLORS = [
    '34',   # blue
    '31',   # red
    '32',   # green
    '35',   # purple
    '33',   # yellow
    '36',   # cyan
]


TAG_NULL        = 0x0000    ## 0x0000  v--- ---- ---- ----
TAG_CONFIG      = 0x0000    ## 0x00tt  v--- ---- -ttt tttt
TAG_MAGIC       = 0x0003    #  0x0003  v--- ---- ---- --11
TAG_VERSION     = 0x0004    #  0x0004  v--- ---- ---- -1--
TAG_RCOMPAT     = 0x0005    #  0x0005  v--- ---- ---- -1-1
TAG_WCOMPAT     = 0x0006    #  0x0006  v--- ---- ---- -11-
TAG_OCOMPAT     = 0x0007    #  0x0007  v--- ---- ---- -111
TAG_GEOMETRY    = 0x0009    #  0x0008  v--- ---- ---- 1-rr
TAG_NAMELIMIT   = 0x000c    #  0x000c  v--- ---- ---- 11--
TAG_FILELIMIT   = 0x000d    #  0x000d  v--- ---- ---- 11-1
TAG_GDELTA      = 0x0100    ## 0x01tt  v--- ---1 -ttt tttt
TAG_GRMDELTA    = 0x0100    #  0x0100  v--- ---1 ---- ----
TAG_NAME        = 0x0200    ## 0x02tt  v--- --1- -ttt tttt
TAG_REG         = 0x0201    #  0x0201  v--- --1- ---- ---1
TAG_DIR         = 0x0202    #  0x0202  v--- --1- ---- --1-
TAG_BOOKMARK    = 0x0204    #  0x0204  v--- --1- ---- -1--
TAG_STICKYNOTE  = 0x0205    #  0x0205  v--- --1- ---- -1-1
TAG_STRUCT      = 0x0300    ## 0x03tt  v--- --11 -ttt tttt
TAG_DATA        = 0x0300    #  0x0300  v--- --11 ---- ----
TAG_BLOCK       = 0x0304    #  0x0304  v--- --11 ---- -1rr
TAG_BSHRUB      = 0x0308    #  0x0308  v--- --11 ---- 1---
TAG_BTREE       = 0x030c    #  0x030c  v--- --11 ---- 11rr
TAG_MROOT       = 0x0311    #  0x0310  v--- --11 ---1 --rr
TAG_MDIR        = 0x0315    #  0x0314  v--- --11 ---1 -1rr
TAG_MTREE       = 0x031c    #  0x031c  v--- --11 ---1 11rr
TAG_DID         = 0x0320    #  0x0320  v--- --11 --1- ----
TAG_BRANCH      = 0x032c    #  0x032c  v--- --11 --1- 11rr
TAG_ATTR        = 0x0400    ## 0x04aa  v--- -1-a -aaa aaaa
TAG_UATTR       = 0x0400    #  0x04aa  v--- -1-- -aaa aaaa
TAG_SATTR       = 0x0500    #  0x05aa  v--- -1-1 -aaa aaaa
TAG_SHRUB       = 0x1000    ## 0x1kkk  v--1 kkkk -kkk kkkk
TAG_ALT         = 0x4000    ## 0x4kkk  v1cd kkkk -kkk kkkk
TAG_B           = 0x0000
TAG_R           = 0x2000
TAG_LE          = 0x0000
TAG_GT          = 0x1000
TAG_CKSUM       = 0x3000    ## 0x300p  v-11 ---- ---- ---p
TAG_P           = 0x0001
TAG_NOTE        = 0x3100    ## 0x3100  v-11 ---1 ---- ----
TAG_ECKSUM      = 0x3200    ## 0x3200  v-11 --1- ---- ----
TAG_GCKSUMDELTA = 0x3300    ## 0x3300  v-11 --11 ---- ----


# some ways of block geometry representations
# 512      -> 512
# 512x16   -> (512, 16)
# 0x200x10 -> (512, 16)
def bdgeom(s):
    s = s.strip()
    b = 10
    if s.startswith('0x') or s.startswith('0X'):
        s = s[2:]
        b = 16
    elif s.startswith('0o') or s.startswith('0O'):
        s = s[2:]
        b = 8
    elif s.startswith('0b') or s.startswith('0B'):
        s = s[2:]
        b = 2

    if 'x' in s:
        s, s_ = s.split('x', 1)
        return (int(s, b), int(s_, b))
    else:
        return int(s, b)

# TODO sync across scripts
# parse some rbyd addr encodings
# 0xa       -> [0xa]
# 0xa.c     -> [(0xa, 0xc)]
# 0x{a,b}   -> [0xa, 0xb]
# 0x{a,b}.c -> [(0xa, 0xc), (0xb, 0xc)]
def rbydaddr(s):
    s = s.strip()
    b = 10
    if s.startswith('0x') or s.startswith('0X'):
        s = s[2:]
        b = 16
    elif s.startswith('0o') or s.startswith('0O'):
        s = s[2:]
        b = 8
    elif s.startswith('0b') or s.startswith('0B'):
        s = s[2:]
        b = 2

    trunk = None
    if '.' in s:
        s, s_ = s.split('.', 1)
        trunk = int(s_, b)

    if s.startswith('{') and '}' in s:
        ss = s[1:s.find('}')].split(',')
    else:
        ss = [s]

    addr = []
    for s in ss:
        if trunk is not None:
            addr.append((int(s, b), trunk))
        else:
            addr.append(int(s, b))

    return addr

def crc32c(data, crc=0):
    crc ^= 0xffffffff
    for b in data:
        crc ^= b
        for j in range(8):
            crc = (crc >> 1) ^ ((crc & 1) * 0x82f63b78)
    return 0xffffffff ^ crc

def popc(x):
    return bin(x).count('1')

def parity(x):
    return popc(x) & 1

def fromle32(data):
    return struct.unpack('<I', data[0:4].ljust(4, b'\0'))[0]

def fromleb128(data):
    word = 0
    for i, b in enumerate(data):
        word |= ((b & 0x7f) << 7*i)
        word &= 0xffffffff
        if not b & 0x80:
            return word, i+1
    return word, len(data)

def fromtag(data):
    data = data.ljust(4, b'\0')
    tag = (data[0] << 8) | data[1]
    weight, d = fromleb128(data[2:])
    size, d_ = fromleb128(data[2+d:])
    return tag>>15, tag&0x7fff, weight, size, 2+d+d_

def xxd(data, width=16):
    for i in range(0, len(data), width):
        yield '%-*s %-*s' % (
                3*width,
                ' '.join('%02x' % b for b in data[i:i+width]),
                width,
                ''.join(
                    b if b >= ' ' and b <= '~' else '.'
                        for b in map(chr, data[i:i+width])))

# TODO sync across scripts
def tagrepr(tag, weight=None, size=None, off=None):
    if (tag & 0x6fff) == TAG_NULL:
        return '%snull%s%s' % (
                'shrub' if tag & TAG_SHRUB else '',
                ' w%d' % weight if weight else '',
                ' %d' % size if size else '')
    elif (tag & 0x6f00) == TAG_CONFIG:
        return '%s%s%s%s' % (
                'shrub' if tag & TAG_SHRUB else '',
                'magic' if (tag & 0xfff) == TAG_MAGIC
                    else 'version' if (tag & 0xfff) == TAG_VERSION
                    else 'rcompat' if (tag & 0xfff) == TAG_RCOMPAT
                    else 'wcompat' if (tag & 0xfff) == TAG_WCOMPAT
                    else 'ocompat' if (tag & 0xfff) == TAG_OCOMPAT
                    else 'geometry' if (tag & 0xfff) == TAG_GEOMETRY
                    else 'namelimit' if (tag & 0xfff) == TAG_NAMELIMIT
                    else 'filelimit' if (tag & 0xfff) == TAG_FILELIMIT
                    else 'config 0x%02x' % (tag & 0xff),
                ' w%d' % weight if weight else '',
                ' %s' % size if size is not None else '')
    elif (tag & 0x6f00) == TAG_GDELTA:
        return '%s%s%s%s' % (
                'shrub' if tag & TAG_SHRUB else '',
                'grmdelta' if (tag & 0xfff) == TAG_GRMDELTA
                    else 'gdelta 0x%02x' % (tag & 0xff),
                ' w%d' % weight if weight else '',
                ' %s' % size if size is not None else '')
    elif (tag & 0x6f00) == TAG_NAME:
        return '%s%s%s%s' % (
                'shrub' if tag & TAG_SHRUB else '',
                'name' if (tag & 0xfff) == TAG_NAME
                    else 'reg' if (tag & 0xfff) == TAG_REG
                    else 'dir' if (tag & 0xfff) == TAG_DIR
                    else 'bookmark' if (tag & 0xfff) == TAG_BOOKMARK
                    else 'stickynote' if (tag & 0xfff) == TAG_STICKYNOTE
                    else 'name 0x%02x' % (tag & 0xff),
                ' w%d' % weight if weight else '',
                ' %s' % size if size is not None else '')
    elif (tag & 0x6f00) == TAG_STRUCT:
        return '%s%s%s%s' % (
                'shrub' if tag & TAG_SHRUB else '',
                'data' if (tag & 0xfff) == TAG_DATA
                    else 'block' if (tag & 0xfff) == TAG_BLOCK
                    else 'bshrub' if (tag & 0xfff) == TAG_BSHRUB
                    else 'btree' if (tag & 0xfff) == TAG_BTREE
                    else 'mroot' if (tag & 0xfff) == TAG_MROOT
                    else 'mdir' if (tag & 0xfff) == TAG_MDIR
                    else 'mtree' if (tag & 0xfff) == TAG_MTREE
                    else 'did' if (tag & 0xfff) == TAG_DID
                    else 'branch' if (tag & 0xfff) == TAG_BRANCH
                    else 'struct 0x%02x' % (tag & 0xff),
                ' w%d' % weight if weight else '',
                ' %s' % size if size is not None else '')
    elif (tag & 0x6e00) == TAG_ATTR:
        return '%s%sattr 0x%02x%s%s' % (
                'shrub' if tag & TAG_SHRUB else '',
                's' if tag & 0x100 else 'u',
                ((tag & 0x100) >> 1) ^ (tag & 0xff),
                ' w%d' % weight if weight else '',
                ' %s' % size if size is not None else '')
    elif tag & TAG_ALT:
        return 'alt%s%s 0x%03x%s%s' % (
                'r' if tag & TAG_R else 'b',
                'gt' if tag & TAG_GT else 'le',
                tag & 0x0fff,
                ' w%d' % weight if weight is not None else '',
                ' 0x%x' % (0xffffffff & (off-size))
                    if size and off is not None
                    else ' -%d' % size if size
                    else '')
    elif (tag & 0x7f00) == TAG_CKSUM:
        return 'cksum%s%s%s%s' % (
                'p' if not tag & 0xfe and tag & TAG_P else '',
                ' 0x%02x' % (tag & 0xff) if tag & 0xfe else '',
                ' w%d' % weight if weight else '',
                ' %s' % size if size is not None else '')
    elif (tag & 0x7f00) == TAG_NOTE:
        return 'note%s%s%s' % (
                ' 0x%02x' % (tag & 0xff) if tag & 0xff else '',
                ' w%d' % weight if weight else '',
                ' %s' % size if size is not None else '')
    elif (tag & 0x7f00) == TAG_ECKSUM:
        return 'ecksum%s%s%s' % (
                ' 0x%02x' % (tag & 0xff) if tag & 0xff else '',
                ' w%d' % weight if weight else '',
                ' %s' % size if size is not None else '')
    elif (tag & 0x7f00) == TAG_GCKSUMDELTA:
        return 'gcksumdelta%s%s%s' % (
                ' 0x%02x' % (tag & 0xff) if tag & 0xff else '',
                ' w%d' % weight if weight else '',
                ' %s' % size if size is not None else '')
    else:
        return '0x%04x%s%s' % (
                tag,
                ' w%d' % weight if weight is not None else '',
                ' %d' % size if size is not None else '')

# tagged data in an rbyd
class Rattr:
    def __init__(self, tag, weight, block, toff, off, data):
        self.tag = tag
        self.weight = weight
        self.block = block
        self.toff = toff
        self.off = off
        self.data = data

    @property
    def size(self):
        return len(self.data)

    def __repr__(self):
        return '<%s %s>' % (self.__class__.__name__, self.tagrepr())

    def tagrepr(self):
        return tagrepr(self.tag, self.weight, self.size)

    def __bool__(self):
        return bool(self.data)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, key):
        return self.data[key]

    def __iter__(self):
        return iter(self.data)

class Ralt:
    def __init__(self, tag, weight, block, toff, off, jump, color=None):
        self.tag = tag
        self.weight = weight
        self.block = block
        self.toff = toff
        self.off = off
        self.jump = jump

        if color is not None:
            self.color = color
        else:
            self.color = 'r' if tag & TAG_R else 'b'

    @property
    def joff(self):
        return self.toff - self.jump

    def __repr__(self):
        return '<%s %s>' % (self.__class__.__name__, self.tagrepr())

    def tagrepr(self):
        return tagrepr(self.tag, self.weight, self.jump, self.toff)

# tree branches are an abstract thing for tree rendering
class TreeBranch:
    def __init__(self, a, b, depth, color):
        # note a and b are context specific
        self.a = a
        self.b = b
        self.depth = depth
        self.color = color

    def __repr__(self):
        return '%s(%s, %s, %s, %s)' % (
                self.__class__.__name__,
                self.a,
                self.b,
                self.depth,
                self.color)

# our core rbyd type
class Rbyd:
    def __init__(self, data, blocks, trunk, weight, rev, eoff, cksum, *,
            gcksumdelta=None,
            corrupt=False):
        if isinstance(blocks, int):
            blocks = [blocks]

        self.data = data
        self.blocks = list(blocks)
        self.trunk = trunk
        self.weight = weight
        self.rev = rev
        self.eoff = eoff
        self.cksum = cksum
        self.gcksumdelta = gcksumdelta
        self.corrupt = corrupt

    @property
    def block(self):
        return self.blocks[0]

    def addr(self):
        if len(self.blocks) == 1:
            return '0x%x.%x' % (self.block, self.trunk)
        else:
            return '0x{%s}.%x' % (
                    ','.join('%x' % block for block in self.blocks),
                    self.trunk)

    def __repr__(self):
        return '<%s %s>' % (self.__class__.__name__, self.addr())

    def __bool__(self):
        return not self.corrupt

    def __eq__(self, other):
        return (self.blocks, self.trunk) == (other.blocks, other.trunk)

    def __ne__(self, other):
        return not self.__eq__(other)

    def __hash__(self):
        return hash((self.blocks, self.trunk))

    @classmethod
    def fetch(cls, data, block, trunk=None, cksum=None):
        # multiple blocks?
        if isinstance(block, list):
            # fetch all blocks
            rbyds = [cls.fetch(data, block, trunk, cksum) for block in block]
            # determine most recent revision
            i = 0
            for i_, rbyd in enumerate(rbyds):
                # compare with sequence arithmetic
                if rbyd and (
                        not rbyds[i]
                            or not ((rbyd.rev - rbyds[i].rev) & 0x80000000)
                            or (rbyd.rev == rbyds[i].rev
                                and rbyd.trunk > rbyds[i].trunk)):
                    i = i_
            # keep track of the other blocks
            rbyd = rbyds[i]
            rbyd.blocks += tuple(
                    rbyds[(i+1+j) % len(rbyds)].block
                        for j in range(len(rbyds)-1))
            return rbyd

        # block may encode a trunk
        block, trunk = (
                block[0] if isinstance(block, tuple)
                    else block,
                trunk if trunk is not None
                    else block[1] if isinstance(block, tuple)
                    else None)

        # data can be either disk + block_size tuple or data
        #
        # note preread data can be useful for avoiding race conditions
        # with cksums and shrubs
        if isinstance(data, tuple):
            f, block_size, *_ = data
            # seek to the block
            f.seek(block * block_size)
            data = f.read(block_size)

        # fetch the rbyd
        rev = fromle32(data[0:4])
        cksum_ = 0
        cksum__ = crc32c(data[0:4])
        cksum___ = cksum__
        perturb = False
        eoff = 0
        eoff_ = None
        j_ = 4
        trunk_ = 0
        trunk__ = 0
        trunk___ = 0
        weight = 0
        weight_ = 0
        weight__ = 0
        gcksumdelta = None
        gcksumdelta_ = None
        while j_ < len(data) and (not trunk or eoff <= trunk):
            # read next tag
            v, tag, w, size, d = fromtag(data[j_:])
            if v != parity(cksum___):
                break
            cksum___ ^= 0x00000080 if v else 0
            cksum___ = crc32c(data[j_:j_+d], cksum___)
            j_ += d
            if not tag & TAG_ALT and j_ + size > len(data):
                break

            # take care of cksums
            if not tag & TAG_ALT:
                if (tag & 0xff00) != TAG_CKSUM:
                    cksum___ = crc32c(data[j_:j_+size], cksum___)

                    # found a gcksumdelta?
                    if (tag & 0xff00) == TAG_GCKSUMDELTA:
                        gcksumdelta_ = Rattr(tag, w,
                                block, j_-d, d, data[j_:j_+size])

                # found a cksum?
                else:
                    # check cksum
                    cksum____ = fromle32(data[j_:j_+4])
                    if cksum___ != cksum____:
                        break
                    # commit what we have
                    eoff = eoff_ if eoff_ else j_ + size
                    cksum_ = cksum__
                    trunk_ = trunk__
                    weight = weight_
                    gcksumdelta = gcksumdelta_
                    gcksumdelta_ = None
                    # update perturb bit
                    perturb = tag & TAG_P
                    # revert to data cksum and perturb
                    cksum___ = cksum__ ^ (0xfca42daf if perturb else 0)

            # evaluate trunks
            if (tag & 0xf000) != TAG_CKSUM:
                if not (trunk and j_-d > trunk and not trunk___):
                    # new trunk?
                    if not trunk___:
                        trunk___ = j_-d
                        weight__ = 0

                    # keep track of weight
                    weight__ += w

                    # end of trunk?
                    if not tag & TAG_ALT:
                        # update trunk/weight unless we found a shrub or an
                        # explicit trunk (which may be a shrub) is requested
                        if not tag & TAG_SHRUB or trunk___ == trunk:
                            trunk__ = trunk___
                            weight_ = weight__
                            # keep track of eoff for best matching trunk
                            if trunk and j_ + size > trunk:
                                eoff_ = j_ + size
                                eoff = eoff_
                                cksum_ = cksum___ ^ (
                                        0xfca42daf if perturb else 0)
                                trunk_ = trunk__
                                weight = weight_
                                gcksumdelta = gcksumdelta_
                        trunk___ = 0

                # update canonical checksum, xoring out any perturb state
                cksum__ = cksum___ ^ (0xfca42daf if perturb else 0)

            if not tag & TAG_ALT:
                j_ += size

        # cksum mismatch?
        if cksum is not None and cksum_ != cksum:
            return cls(data, block, trunk or 0, 0, rev, 0, cksum_,
                    corrupt=True)

        return cls(data, block, trunk_, weight, rev, eoff, cksum_,
                gcksumdelta=gcksumdelta,
                corrupt=not trunk_)

    def lookupnext(self, rid, tag=None, *,
            path=False):
        if not self:
            return None, None, None, *(([],) if path else ())

        tag = max(tag or 0, 0x1)
        lower = 0
        upper = self.weight
        path_ = []

        # descend down tree
        j = self.trunk
        while True:
            _, alt, w, jump, d = fromtag(self.data[j:])

            # found an alt?
            if alt & TAG_ALT:
                # follow?
                if ((rid, tag & 0xfff) > (upper-w-1, alt & 0xfff)
                        if alt & TAG_GT
                        else ((rid, tag & 0xfff)
                            <= (lower+w-1, alt & 0xfff))):
                    lower += upper-lower-w if alt & TAG_GT else 0
                    upper -= upper-lower-w if not alt & TAG_GT else 0
                    j = j - jump

                    if path:
                        # figure out which color
                        if alt & TAG_R:
                            _, nalt, _, _, _ = fromtag(self.data[j+jump+d:])
                            if nalt & TAG_R:
                                color = 'y'
                            else:
                                color = 'r'
                        else:
                            color = 'b'

                        path_.append((
                                Ralt(alt, w, self.block, j+jump, j+jump+d,
                                    jump, color),
                                True))

                # stay on path
                else:
                    lower += w if not alt & TAG_GT else 0
                    upper -= w if alt & TAG_GT else 0
                    j = j + d

                    if path:
                        # figure out which color
                        if alt & TAG_R:
                            _, nalt, _, _, _ = fromtag(self.data[j:])
                            if nalt & TAG_R:
                                color = 'y'
                            else:
                                color = 'r'
                        else:
                            color = 'b'

                        path_.append((
                                Ralt(alt, w, self.block, j-d, j,
                                    jump, color),
                                False))

            # found tag
            else:
                rid_ = upper-1
                tag_ = alt
                w_ = upper-lower

                if not tag_ or (rid_, tag_) < (rid, tag):
                    return None, None, None, *(([],) if path else ())

                return (rid_, tag_,
                        Rattr(tag_, w_, self.block, j, j+d,
                            self.data[j+d:j+d+jump]),
                        *((path_,) if path else ()))

    def lookup(self, rid, tag=None, mask=None, *,
            path=False):
        if tag is None:
            tag, mask = 0, 0xffff

        rid_, tag_, rattr_, *path_ = self.lookupnext(rid, tag & ~(mask or 0),
                path=path)
        if (rid_ is None
                or rid_ != rid
                or (tag_ & ~(mask or 0)) != (tag & ~(mask or 0))):
            if mask is not None:
                return None, None, *path_
            elif path:
                return None, *path_
            else:
                return None

        if mask is not None:
            return tag_, rattr_, *path_
        elif path:
            return rattr_, *path_
        else:
            return rattr_

    def __getitem__(self, key):
        if not isinstance(key, tuple):
            key = (key,)

        return self.lookup(*key)

    def __contains__(self, key):
        if not isinstance(key, tuple):
            key = (key,)

        v = self.lookup(*key)
        if isinstance(v, tuple):
            return v[0] is not None
        else:
            return v is not None

    def __iter__(self):
        rid, tag = -1, 0
        while True:
            rid, tag, rattr = self.lookupnext(rid, tag+0x1)
            # found end of tree?
            if rid is None:
                break

            yield rid, tag, rattr

    # lookup by name
    def namelookup(self, did, name):
        # binary search
        best = (False, None, None, None)
        lower = 0
        upper = self.weight
        while lower < upper:
            rid, tag, rattr = self.lookupnext(lower + (upper-1-lower)//2)
            if rid is None:
                break

            # treat vestigial names as a catch-all
            if ((tag == TAG_NAME and rid-(rattr.weight-1) == 0)
                    or (tag & 0xff00) != TAG_NAME):
                did_ = 0
                name_ = b''
            else:
                did_, d = fromleb128(rattr[:])
                name_ = rattr[d:]

            # bisect search space
            if (did_, name_) > (did, name):
                upper = rid-(rattr.weight-1)
            elif (did_, name_) < (did, name):
                lower = rid + 1
                # keep track of best match
                best = (False, rid, tag, rattr)
            else:
                # found a match
                return True, rid, tag, rattr

        return best

    # create tree representation for debugging
    def tree(self, *,
            rbyd=False):
        trunks = co.defaultdict(lambda: (-1, 0))
        alts = co.defaultdict(lambda: {})

        rid, tag = -1, 0
        while True:
            rid, tag, rattr, path = self.lookupnext(rid, tag+0x1,
                    path=True)
            # found end of tree?
            if rid is None:
                break

            # keep track of trunks/alts
            trunks[rattr.toff] = (rid, tag)

            for ralt, followed in path:
                if followed:
                    alts[ralt.toff] |= {'f': ralt.joff, 'c': ralt.color}
                else:
                    alts[ralt.toff] |= {'nf': ralt.off, 'c': ralt.color}

        if rbyd:
            # treat unreachable alts as converging paths
            for j_, alt in alts.items():
                if 'f' not in alt:
                    alt['f'] = alt['nf']
                elif 'nf' not in alt:
                    alt['nf'] = alt['f']

        else:
            # prune any alts with unreachable edges
            pruned = {}
            for j, alt in alts.items():
                if 'f' not in alt:
                    pruned[j] = alt['nf']
                elif 'nf' not in alt:
                    pruned[j] = alt['f']
            for j in pruned.keys():
                del alts[j]

            for j, alt in alts.items():
                while alt['f'] in pruned:
                    alt['f'] = pruned[alt['f']]
                while alt['nf'] in pruned:
                    alt['nf'] = pruned[alt['nf']]

        # find the trunk and depth of each alt
        def rec_trunk(j):
            if j not in alts:
                return trunks[j]
            else:
                if 'nft' not in alts[j]:
                    alts[j]['nft'] = rec_trunk(alts[j]['nf'])
                return alts[j]['nft']

        for j in alts.keys():
            rec_trunk(j)
        for j, alt in alts.items():
            if alt['f'] in alts:
                alt['ft'] = alts[alt['f']]['nft']
            else:
                alt['ft'] = trunks[alt['f']]

        def rec_height(j):
            if j not in alts:
                return 0
            else:
                if 'h' not in alts[j]:
                    alts[j]['h'] = max(
                            rec_height(alts[j]['f']),
                            rec_height(alts[j]['nf'])) + 1
                return alts[j]['h']

        for j in alts.keys():
            rec_height(j)

        t_depth = max((alt['h']+1 for alt in alts.values()), default=0)

        # convert to more general tree representation
        tree = set()
        for j, alt in alts.items():
            # note all non-trunk edges should be colored black
            tree.add(TreeBranch(
                    alt['nft'],
                    alt['nft'],
                    depth=t_depth-1 - alt['h'],
                    color=alt['c']))
            if alt['ft'] != alt['nft']:
                tree.add(TreeBranch(
                        alt['nft'],
                        alt['ft'],
                        depth=t_depth-1 - alt['h'],
                        color='b'))

        return tree


def dbg_log(rbyd, *,
        block_size,
        color=False,
        **args):
    data = rbyd.data

    # preprocess jumps
    if args.get('jumps'):
        jumps = []
        j_ = 4
        while j_ < (block_size if args.get('all') else rbyd.eoff):
            j = j_
            v, tag, w, size, d = fromtag(data[j_:])
            j_ += d
            if not tag & TAG_ALT:
                j_ += size

            if tag & TAG_ALT and size:
                # figure out which alt color
                if tag & TAG_R:
                    _, ntag, _, _, _ = fromtag(data[j_:])
                    if ntag & TAG_R:
                        jumps.append((j, j-size, 0, 'y'))
                    else:
                        jumps.append((j, j-size, 0, 'r'))
                else:
                    jumps.append((j, j-size, 0, 'b'))

        # figure out x-offsets to avoid collisions between jumps
        for j in range(len(jumps)):
            a, b, _, c = jumps[j]
            x = 0
            while any(
                    max(a, b) >= min(a_, b_)
                            and max(a_, b_) >= min(a, b)
                            and x == x_
                        for a_, b_, x_, _ in jumps[:j]):
                x += 1
            jumps[j] = a, b, x, c

        def jumprepr(j):
            # render jumps
            chars = {}
            for a, b, x, c in jumps:
                c_start = (
                    '\x1b[33m' if color and c == 'y'
                        else '\x1b[31m' if color and c == 'r'
                        else '\x1b[90m' if color
                        else '')
                c_stop = '\x1b[m' if color else ''

                if j == a:
                    for x_ in range(2*x+1):
                        chars[x_] = '%s-%s' % (c_start, c_stop)
                    chars[2*x+1] = '%s\'%s' % (c_start, c_stop)
                elif j == b:
                    for x_ in range(2*x+1):
                        chars[x_] = '%s-%s' % (c_start, c_stop)
                    chars[2*x+1] = '%s.%s' % (c_start, c_stop)
                    chars[0] = '%s<%s' % (c_start, c_stop)
                elif j >= min(a, b) and j <= max(a, b):
                    chars[2*x+1] = '%s|%s' % (c_start, c_stop)

            return ''.join(chars.get(x, ' ')
                    for x in range(max(chars.keys(), default=0)+1))

    # preprocess lifetimes
    lifetime_width = 0
    if args.get('lifetimes'):
        class Lifetime:
            color_i = 0
            def __init__(self, j):
                self.origin = j
                self.tags = set()
                self.color = COLORS[self.__class__.color_i]
                self.__class__.color_i = (
                        self.__class__.color_i + 1) % len(COLORS)

            def add(self, j):
                self.tags.add(j)

            def __bool__(self):
                return bool(self.tags)


        # first figure out where each rid comes from
        weights = []
        lifetimes = []
        def index(weights, rid):
            for i, w in enumerate(weights):
                if rid < w:
                    return i, rid
                rid -= w
            return len(weights), 0

        checkpoint_js = [0]
        checkpoints = [([], [], set(), set(), set())]
        def checkpoint(j, weights, lifetimes, grows, shrinks, tags):
            checkpoint_js.append(j)
            checkpoints.append((
                    weights.copy(), lifetimes.copy(),
                    grows, shrinks, tags))

        lower_, upper_ = 0, 0
        weight_ = 0
        trunk_ = 0
        j_ = 4
        while j_ < (block_size if args.get('all') else rbyd.eoff):
            j = j_
            v, tag, w, size, d = fromtag(data[j_:])
            j_ += d
            if not tag & TAG_ALT:
                j_ += size

            # evaluate trunks
            if (tag & 0xf000) != TAG_CKSUM:
                if not trunk_:
                    trunk_ = j_-d
                    lower_, upper_ = 0, 0

                if tag & TAG_ALT and not tag & TAG_GT:
                    lower_ += w
                else:
                    upper_ += w

                if not tag & TAG_ALT:
                    # derive the current tag's rid from alt weights
                    delta = (lower_+upper_) - weight_
                    weight_ = lower_+upper_
                    rid = lower_ + w-1
                    trunk_ = 0

            if (tag & 0xf000) != TAG_CKSUM and not tag & TAG_ALT:
                # note we ignore out-of-bounds here for debugging
                if delta > 0:
                    # grow lifetimes
                    i, rid_ = index(weights, lower_)
                    if rid_ > 0:
                        weights[i:i+1] = [rid_, delta, weights[i]-rid_]
                        lifetimes[i:i+1] = [
                                lifetimes[i], Lifetime(j), lifetimes[i]]
                    else:
                        weights[i:i] = [delta]
                        lifetimes[i:i] = [Lifetime(j)]

                    checkpoint(j, weights, lifetimes, {i}, set(), {i})

                elif delta < 0:
                    # shrink lifetimes
                    i, rid_ = index(weights, lower_)
                    delta_ = -delta
                    weights_ = weights.copy()
                    lifetimes_ = lifetimes.copy()
                    shrinks = set()
                    while delta_ > 0 and i < len(weights_):
                        if weights_[i] > delta_:
                            delta__ = min(delta_, weights_[i]-rid_)
                            delta_ -= delta__
                            weights_[i] -= delta__
                            i += 1
                            rid_ = 0
                        else:
                            delta_ -= weights_[i]
                            weights_[i:i+1] = []
                            lifetimes_[i:i+1] = []
                            shrinks.add(i + len(shrinks))

                    checkpoint(j, weights, lifetimes, set(), shrinks, {i})
                    weights = weights_
                    lifetimes = lifetimes_

                if rid >= 0:
                    # attach tag to lifetime
                    i, rid_ = index(weights, rid)
                    if i < len(weights):
                        lifetimes[i].add(j)

                    if delta == 0:
                        checkpoint(j, weights, lifetimes, set(), set(), {i})

        lifetime_width = 2*max((
                sum(1 for lifetime in lifetimes if lifetime)
                    for _, lifetimes, _, _, _ in checkpoints),
                default=0)

        def lifetimerepr(j):
            x = bisect.bisect(checkpoint_js, j)-1
            j_ = checkpoint_js[x]
            weights, lifetimes, grows, shrinks, tags = checkpoints[x]

            reprs = []
            colors = []
            was = None
            for i, (w, lifetime) in enumerate(zip(weights, lifetimes)):
                # skip lifetimes with no tags and shrinks
                if not lifetime or (j != j_ and i in shrinks):
                    if i in grows or i in shrinks or i in tags:
                        tags = tags.copy()
                        tags.add(i+1)
                    continue

                if j == j_ and i in grows:
                    reprs.append('.')
                    was = 'grow'
                elif j == j_ and i in shrinks:
                    reprs.append('\'')
                    was = 'shrink'
                elif j == j_ and i in tags:
                    reprs.append('* ')
                elif was == 'grow':
                    reprs.append('\\ ')
                elif was == 'shrink':
                    reprs.append('/ ')
                else:
                    reprs.append('| ')

                colors.append(lifetime.color)

            return '%s%*s' % (
                    ''.join('%s%s%s' % (
                            '\x1b[%sm' % c if color else '',
                            r,
                            '\x1b[m' if color else '')
                        for r, c in zip(reprs, colors)),
                    lifetime_width - sum(len(r) for r in reprs), '')


    # dynamically size the id field
    #
    # we need to do an additional pass to find this since our rbyd weight
    # does not include any shrub trees
    weight_ = 0
    weight__ = 0
    trunk_ = 0
    j_ = 4
    while j_ < (block_size if args.get('all') else rbyd.eoff):
        j = j_
        v, tag, w, size, d = fromtag(data[j_:])
        j_ += d

        if not tag & TAG_ALT:
            j_ += size

        # evaluate trunks
        if (tag & 0xf000) != TAG_CKSUM:
            if not trunk_:
                trunk_ = j_-d
                weight__ = 0

            weight__ += w

            if not tag & TAG_ALT:
                # found new weight?
                weight_ = max(weight_, weight__)
                trunk_ = 0

    w_width = mt.ceil(mt.log10(max(1, weight_)+1))

    # print revision count
    if args.get('raw'):
        print('%8s: %*s%*s %s' % (
                '%04x' % 0,
                lifetime_width, '',
                2*w_width+1, '',
                next(xxd(data[0:4]))))

    # print tags
    cksum = crc32c(data[0:4])
    cksum_ = cksum
    perturb = False
    lower_, upper_ = 0, 0
    trunk_ = 0
    j_ = 4
    while j_ < (block_size if args.get('all') else rbyd.eoff):
        notes = []

        # read next tag
        j = j_
        v, tag, w, size, d = fromtag(data[j_:])
        if v != parity(cksum_):
            notes.append('v!=%x' % parity(cksum_))
        cksum_ ^= 0x00000080 if v else 0
        cksum_ = crc32c(data[j_:j_+d], cksum_)
        j_ += d

        # take care of cksums
        if not tag & TAG_ALT:
            if (tag & 0xff00) != TAG_CKSUM:
                cksum_ = crc32c(data[j_:j_+size], cksum_)
            # found a cksum?
            else:
                # check cksum
                cksum__ = fromle32(data[j_:j_+4])
                if cksum_ != cksum__:
                    notes.append('cksum!=%08x' % cksum__)
                # update perturb bit
                perturb = tag & TAG_P
                # revert to data cksum and perturb
                cksum_ = cksum ^ (0xfca42daf if perturb else 0)
            j_ += size

        # evaluate trunks
        if (tag & 0xf000) != TAG_CKSUM:
            if not trunk_:
                trunk_ = j_-d
                lower_, upper_ = 0, 0

            if tag & TAG_ALT and not tag & TAG_GT:
                lower_ += w
            else:
                upper_ += w

            # end of trunk?
            if not tag & TAG_ALT:
                # derive the current tag's rid from alt weights
                rid = lower_ + w-1
                trunk_ = 0

            # update canonical checksum, xoring out any perturb state
            cksum = cksum_ ^ (0xfca42daf if perturb else 0)

        # show human-readable tag representation
        print('%s%08x:%s %*s%s%*s %-*s%s%s%s' % (
                '\x1b[90m' if color and j >= rbyd.eoff else '',
                j,
                '\x1b[m' if color and j >= rbyd.eoff else '',
                lifetime_width, lifetimerepr(j)
                    if args.get('lifetimes')
                    else '',
                '\x1b[90m' if color and j >= rbyd.eoff else '',
                2*w_width+1, '' if (tag & 0xe000) != 0x0000
                    else '%d-%d' % (rid-(w-1), rid) if w > 1
                    else rid,
                56+w_width, '%-*s  %s' % (
                    21+w_width, tagrepr(tag, w, size, j),
                    next(xxd(data[j+d:j+d+min(size, 8)], 8), '')
                        if not args.get('raw')
                            and not args.get('no_truncate')
                            and not tag & TAG_ALT
                        else ''),
                ' (%s)' % ', '.join(notes) if notes else '',
                '\x1b[m' if color and j >= rbyd.eoff else '',
                ' %s' % jumprepr(j)
                    if args.get('jumps') and not notes
                    else ''))

        # show on-disk encoding of tags
        if args.get('raw'):
            for o, line in enumerate(xxd(data[j:j+d])):
                print('%s%8s: %*s%*s %s%s' % (
                        '\x1b[90m' if color and j >= rbyd.eoff else '',
                        '%04x' % (j + o*16),
                        lifetime_width, '',
                        2*w_width+1, '',
                        line,
                        '\x1b[m' if color and j >= rbyd.eoff else ''))
        if args.get('raw') or args.get('no_truncate'):
            if not tag & TAG_ALT:
                for o, line in enumerate(xxd(data[j+d:j+d+size])):
                    print('%s%8s: %*s%*s %s%s' % (
                            '\x1b[90m' if color and j >= rbyd.eoff else '',
                            '%04x' % (j+d + o*16),
                            lifetime_width, '',
                            2*w_width+1, '',
                            line,
                            '\x1b[m' if color and j >= rbyd.eoff else ''))


def dbg_tree(rbyd, *,
        block_size,
        color=False,
        **args):
    if not rbyd:
        return

    data = rbyd.data

    # precompute tree
    t_width = 0
    if args.get('tree') or args.get('tree_rbyd'):
        tree = rbyd.tree(rbyd=args.get('tree_rbyd'))

        # find the max depth from the tree
        t_depth = max((b.depth+1 for b in tree), default=0)
        if t_depth > 0:
            t_width = 2*t_depth + 2

        def treerepr(rid, tag):
            if t_depth == 0:
                return ''

            def branchrepr(x, d, was):
                for b in tree:
                    if b.depth == d and b.b == x:
                        if any(b.depth == d and b.a == x
                                for b in tree):
                            return '+-', b.color, b.color
                        elif any(b.depth == d
                                    and x > min(b.a, b.b)
                                    and x < max(b.a, b.b)
                                for b in tree):
                            return '|-', b.color, b.color
                        elif b.a < b.b:
                            return '\'-', b.color, b.color
                        else:
                            return '.-', b.color, b.color
                for b in tree:
                    if b.depth == d and b.a == x:
                        return '+ ', b.color, None
                for b in tree:
                    if (b.depth == d
                            and x > min(b.a, b.b)
                            and x < max(b.a, b.b)):
                        return '| ', b.color, was
                if was:
                    return '--', was, was
                return '  ', None, None

            trunk = []
            was = None
            for d in range(t_depth):
                t, c, was = branchrepr((rid, tag), d, was)

                trunk.append('%s%s%s%s' % (
                        '\x1b[33m' if color and c == 'y'
                            else '\x1b[31m' if color and c == 'r'
                            else '\x1b[90m' if color and c == 'b'
                            else '',
                        t,
                        ('>' if was else ' ') if d == t_depth-1 else '',
                        '\x1b[m' if color and c else ''))

            return '%s ' % ''.join(trunk)


    # dynamically size the id field
    w_width = mt.ceil(mt.log10(max(1, rbyd.weight)+1))

    for i, (rid, tag, rattr) in enumerate(rbyd):
        # show human-readable tag representation
        print('%08x: %s%*s %-*s  %s' % (
                rattr.toff,
                treerepr(rid, tag)
                    if args.get('tree') or args.get('tree_rbyd')
                    else '',
                2*w_width+1, '%d-%d' % (rid-(rattr.weight-1), rid)
                    if rattr.weight > 1
                    else rid if rattr.weight > 0 or i == 0
                    else '',
                21+w_width, rattr.tagrepr(),
                next(xxd(rattr[:8], 8), '')
                    if not args.get('raw')
                        and not args.get('no_truncate')
                        and not tag & TAG_ALT
                    else ''))

        # show on-disk encoding of tags
        if args.get('raw'):
            for o, line in enumerate(xxd(data[rattr.toff:rattr.off])):
                print('%8s: %*s%*s %s' % (
                        '%04x' % (rattr.toff + o*16),
                        t_width, '',
                        2*w_width+1, '',
                        line))
        if args.get('raw') or args.get('no_truncate'):
            if not tag & TAG_ALT:
                for o, line in enumerate(xxd(rattr[:])):
                    print('%8s: %*s%*s %s' % (
                            '%04x' % (rattr.off + o*16),
                            t_width, '',
                            2*w_width+1, '',
                            line))


def main(disk, blocks=None, *,
        block_size=None,
        block_count=None,
        trunk=None,
        color='auto',
        **args):
    # figure out what color should be
    if color == 'auto':
        color = sys.stdout.isatty()
    elif color == 'always':
        color = True
    else:
        color = False

    # is bd geometry specified?
    if isinstance(block_size, tuple):
        block_size, block_count_ = block_size
        if block_count is None:
            block_count = block_count_

    # flatten blocks, default to block 0
    if not blocks:
        blocks = [(0,)]
    blocks = [block for blocks_ in blocks for block in blocks_]

    with open(disk, 'rb') as f:
        # if block_size is omitted, assume the block device is one big block
        if block_size is None:
            f.seek(0, os.SEEK_END)
            block_size = f.tell()

        # fetch the rbyd
        rbyd = Rbyd.fetch((f, block_size), blocks)

    print('rbyd %s w%d, rev %08x, size %d, cksum %08x' % (
            rbyd.addr(),
            rbyd.weight,
            rbyd.rev,
            rbyd.eoff,
            rbyd.cksum))

    if args.get('log'):
        dbg_log(rbyd,
                block_size=block_size,
                color=color,
                **args)
    else:
        dbg_tree(rbyd,
                block_size=block_size,
                color=color,
                **args)

    if args.get('error_on_corrupt') and not rbyd:
        sys.exit(2)


if __name__ == "__main__":
    import argparse
    import sys
    parser = argparse.ArgumentParser(
            description="Debug rbyd metadata.",
            allow_abbrev=False)
    parser.add_argument(
            'disk',
            help="File containing the block device.")
    parser.add_argument(
            'blocks',
            nargs='*',
            type=rbydaddr,
            help="Block address of metadata blocks.")
    parser.add_argument(
            '-b', '--block-size',
            type=bdgeom,
            help="Block size/geometry in bytes.")
    parser.add_argument(
            '--block-count',
            type=lambda x: int(x, 0),
            help="Block count in blocks.")
    parser.add_argument(
            '--trunk',
            type=lambda x: int(x, 0),
            help="Use this offset as the trunk of the tree.")
    parser.add_argument(
            '--color',
            choices=['never', 'always', 'auto'],
            default='auto',
            help="When to use terminal colors. Defaults to 'auto'.")
    parser.add_argument(
            '-a', '--all',
            action='store_true',
            help="Don't stop parsing on bad commits.")
    parser.add_argument(
            '-l', '--log',
            action='store_true',
            help="Show the raw tags as they appear in the log.")
    parser.add_argument(
            '-r', '--raw',
            action='store_true',
            help="Show the raw data including tag encodings.")
    parser.add_argument(
            '-T', '--no-truncate',
            action='store_true',
            help="Don't truncate, show the full contents.")
    parser.add_argument(
            '-t', '--tree',
            action='store_true',
            help="Show the rbyd tree.")
    # TODO adopt this rename in all scripts
    parser.add_argument(
            '-R', '--tree-rbyd',
            action='store_true',
            help="Show the full rbyd tree.")
    parser.add_argument(
            '-j', '--jumps',
            action='store_true',
            help="Show alt pointer jumps in the margin.")
    parser.add_argument(
            '-g', '--lifetimes',
            action='store_true',
            help="Show inserts/deletes of ids in the margin.")
    parser.add_argument(
            '-e', '--error-on-corrupt',
            action='store_true',
            help="Error if no valid commit is found.")
    sys.exit(main(**{k: v
            for k, v in vars(parser.parse_intermixed_args()).items()
            if v is not None}))