littlefs/scripts/dbgbtree.py

#!/usr/bin/env python3

# prevent local imports
if __name__ == "__main__":
    __import__('sys').path.pop(0)

import bisect
import collections as co
import functools as ft
import itertools as it
import math as mt
import os
import struct

try:
    import crc32c as crc32c_lib
except ModuleNotFoundError:
    crc32c_lib = None


TAG_NULL        = 0x0000    ## 0x0000  v--- ---- ---- ----
TAG_CONFIG      = 0x0000    ## 0x00tt  v--- ---- -ttt tttt
TAG_MAGIC       = 0x0031    #  0x003r  v--- ---- --11 --rr
TAG_VERSION     = 0x0034    #  0x0034  v--- ---- --11 -1--
TAG_RCOMPAT     = 0x0035    #  0x0035  v--- ---- --11 -1-1
TAG_WCOMPAT     = 0x0036    #  0x0036  v--- ---- --11 -11-
TAG_OCOMPAT     = 0x0037    #  0x0037  v--- ---- --11 -111
TAG_GEOMETRY    = 0x0038    #  0x0038  v--- ---- --11 1---
TAG_NAMELIMIT   = 0x0039    #  0x0039  v--- ---- --11 1--1
TAG_FILELIMIT   = 0x003a    #  0x003a  v--- ---- --11 1-1-
TAG_GDELTA      = 0x0100    ## 0x01tt  v--- ---1 -ttt ttrr
TAG_GRMDELTA    = 0x0100    #  0x0100  v--- ---1 ---- ----
TAG_NAME        = 0x0200    ## 0x02tt  v--- --1- -ttt tttt
TAG_BNAME       = 0x0200    #  0x0200  v--- --1- ---- ----
TAG_REG         = 0x0201    #  0x0201  v--- --1- ---- ---1
TAG_DIR         = 0x0202    #  0x0202  v--- --1- ---- --1-
TAG_STICKYNOTE  = 0x0203    #  0x0203  v--- --1- ---- --11
TAG_BOOKMARK    = 0x0204    #  0x0204  v--- --1- ---- -1--
TAG_MNAME       = 0x0220    #  0x0220  v--- --1- --1- ----
TAG_STRUCT      = 0x0300    ## 0x03tt  v--- --11 -ttt ttrr
TAG_BRANCH      = 0x0300    #  0x030r  v--- --11 ---- --rr
TAG_DATA        = 0x0304    #  0x0304  v--- --11 ---- -1--
TAG_BLOCK       = 0x0308    #  0x0308  v--- --11 ---- 1err
TAG_DID         = 0x0314    #  0x0314  v--- --11 ---1 -1--
TAG_BSHRUB      = 0x0318    #  0x0318  v--- --11 ---1 1---
TAG_BTREE       = 0x031c    #  0x031c  v--- --11 ---1 11rr
TAG_MROOT       = 0x0321    #  0x032r  v--- --11 --1- --rr
TAG_MDIR        = 0x0325    #  0x0324  v--- --11 --1- -1rr
TAG_MTREE       = 0x032c    #  0x032c  v--- --11 --1- 11rr
TAG_ATTR        = 0x0400    ## 0x04aa  v--- -1-a -aaa aaaa
TAG_UATTR       = 0x0400    #  0x04aa  v--- -1-- -aaa aaaa
TAG_SATTR       = 0x0500    #  0x05aa  v--- -1-1 -aaa aaaa
TAG_SHRUB       = 0x1000    ## 0x1kkk  v--1 kkkk -kkk kkkk
TAG_ALT         = 0x4000    ## 0x4kkk  v1cd kkkk -kkk kkkk
TAG_B           = 0x0000
TAG_R           = 0x2000
TAG_LE          = 0x0000
TAG_GT          = 0x1000
TAG_CKSUM       = 0x3000    ## 0x300p  v-11 ---- ---- -pqq
TAG_PHASE       = 0x0003
TAG_PERTURB     = 0x0004
TAG_NOTE        = 0x3100    ## 0x3100  v-11 ---1 ---- ----
TAG_ECKSUM      = 0x3200    ## 0x3200  v-11 --1- ---- ----
TAG_GCKSUMDELTA = 0x3300    ## 0x3300  v-11 --11 ---- ----


# some ways of block geometry representations
# 512      -> 512
# 512x16   -> (512, 16)
# 0x200x10 -> (512, 16)
def bdgeom(s):
    s = s.strip()
    b = 10
    if s.startswith('0x') or s.startswith('0X'):
        s = s[2:]
        b = 16
    elif s.startswith('0o') or s.startswith('0O'):
        s = s[2:]
        b = 8
    elif s.startswith('0b') or s.startswith('0B'):
        s = s[2:]
        b = 2

    if 'x' in s:
        s, s_ = s.split('x', 1)
        return (int(s, b), int(s_, b))
    else:
        return int(s, b)

# parse some rbyd addr encodings
# 0xa       -> (0xa,)
# 0xa.c     -> ((0xa, 0xc),)
# 0x{a,b}   -> (0xa, 0xb)
# 0x{a,b}.c -> ((0xa, 0xc), (0xb, 0xc))
def rbydaddr(s):
    s = s.strip()
    b = 10
    if s.startswith('0x') or s.startswith('0X'):
        s = s[2:]
        b = 16
    elif s.startswith('0o') or s.startswith('0O'):
        s = s[2:]
        b = 8
    elif s.startswith('0b') or s.startswith('0B'):
        s = s[2:]
        b = 2

    trunk = None
    if '.' in s:
        s, s_ = s.split('.', 1)
        trunk = int(s_, b)

    if s.startswith('{') and '}' in s:
        ss = s[1:s.find('}')].split(',')
    else:
        ss = [s]

    addr = []
    for s in ss:
        if trunk is not None:
            addr.append((int(s, b), trunk))
        else:
            addr.append(int(s, b))

    return tuple(addr)

def crc32c(data, crc=0):
    if crc32c_lib is not None:
        return crc32c_lib.crc32c(data, crc)
    else:
        crc ^= 0xffffffff
        for b in data:
            crc ^= b
            for j in range(8):
                crc = (crc >> 1) ^ ((crc & 1) * 0x82f63b78)
        return 0xffffffff ^ crc

def popc(x):
    return bin(x).count('1')

def parity(x):
    return popc(x) & 1

def fromle32(data, j=0):
    return struct.unpack('<I', data[j:j+4].ljust(4, b'\0'))[0]

def fromleb128(data, j=0):
    word = 0
    d = 0
    while j+d < len(data):
        b = data[j+d]
        word |= (b & 0x7f) << 7*d
        word &= 0xffffffff
        if not b & 0x80:
            return word, d+1
        d += 1
    return word, d

def fromtag(data, j=0):
    d = 0
    tag = struct.unpack('>H', data[j:j+2].ljust(2, b'\0'))[0]; d += 2
    weight, d_ = fromleb128(data, j+d); d += d_
    size, d_ = fromleb128(data, j+d); d += d_
    return tag>>15, tag&0x7fff, weight, size, d

def frombranch(data, j=0):
    d = 0
    block, d_ = fromleb128(data, j+d); d += d_
    trunk, d_ = fromleb128(data, j+d); d += d_
    cksum = fromle32(data, j+d); d += 4
    return block, trunk, cksum, d

def xxd(data, width=16):
    for i in range(0, len(data), width):
        yield '%-*s %-*s' % (
                3*width,
                ' '.join('%02x' % b for b in data[i:i+width]),
                width,
                ''.join(
                    b if b >= ' ' and b <= '~' else '.'
                        for b in map(chr, data[i:i+width])))

# human readable tag repr
def tagrepr(tag, weight=None, size=None, *,
        global_=False,
        toff=None):
    # null tags
    if (tag & 0x6fff) == TAG_NULL:
        return '%snull%s%s' % (
                'shrub' if tag & TAG_SHRUB else '',
                ' w%d' % weight if weight else '',
                ' %d' % size if size else '')
    # config tags
    elif (tag & 0x6f00) == TAG_CONFIG:
        return '%s%s%s%s' % (
                'shrub' if tag & TAG_SHRUB else '',
                'magic' if (tag & 0xfff) == TAG_MAGIC
                    else 'version' if (tag & 0xfff) == TAG_VERSION
                    else 'rcompat' if (tag & 0xfff) == TAG_RCOMPAT
                    else 'wcompat' if (tag & 0xfff) == TAG_WCOMPAT
                    else 'ocompat' if (tag & 0xfff) == TAG_OCOMPAT
                    else 'geometry' if (tag & 0xfff) == TAG_GEOMETRY
                    else 'namelimit' if (tag & 0xfff) == TAG_NAMELIMIT
                    else 'filelimit' if (tag & 0xfff) == TAG_FILELIMIT
                    else 'config 0x%02x' % (tag & 0xff),
                ' w%d' % weight if weight else '',
                ' %s' % size if size is not None else '')
    # global-state delta tags
    elif (tag & 0x6f00) == TAG_GDELTA:
        if global_:
            return '%s%s%s%s' % (
                    'shrub' if tag & TAG_SHRUB else '',
                    'grm' if (tag & 0xfff) == TAG_GRMDELTA
                        else 'gstate 0x%02x' % (tag & 0xff),
                    ' w%d' % weight if weight else '',
                    ' %s' % size if size is not None else '')
        else:
            return '%s%s%s%s' % (
                    'shrub' if tag & TAG_SHRUB else '',
                    'grmdelta' if (tag & 0xfff) == TAG_GRMDELTA
                        else 'gdelta 0x%02x' % (tag & 0xff),
                    ' w%d' % weight if weight else '',
                    ' %s' % size if size is not None else '')
    # name tags, includes file types
    elif (tag & 0x6f00) == TAG_NAME:
        return '%s%s%s%s' % (
                'shrub' if tag & TAG_SHRUB else '',
                'bname' if (tag & 0xfff) == TAG_BNAME
                    else 'reg' if (tag & 0xfff) == TAG_REG
                    else 'dir' if (tag & 0xfff) == TAG_DIR
                    else 'stickynote' if (tag & 0xfff) == TAG_STICKYNOTE
                    else 'bookmark' if (tag & 0xfff) == TAG_BOOKMARK
                    else 'mname' if (tag & 0xfff) == TAG_MNAME
                    else 'name 0x%02x' % (tag & 0xff),
                ' w%d' % weight if weight else '',
                ' %s' % size if size is not None else '')
    # structure tags
    elif (tag & 0x6f00) == TAG_STRUCT:
        return '%s%s%s%s' % (
                'shrub' if tag & TAG_SHRUB else '',
                'branch' if (tag & 0xfff) == TAG_BRANCH
                    else 'data' if (tag & 0xfff) == TAG_DATA
                    else 'block' if (tag & 0xfff) == TAG_BLOCK
                    else 'did' if (tag & 0xfff) == TAG_DID
                    else 'bshrub' if (tag & 0xfff) == TAG_BSHRUB
                    else 'btree' if (tag & 0xfff) == TAG_BTREE
                    else 'mroot' if (tag & 0xfff) == TAG_MROOT
                    else 'mdir' if (tag & 0xfff) == TAG_MDIR
                    else 'mtree' if (tag & 0xfff) == TAG_MTREE
                    else 'struct 0x%02x' % (tag & 0xff),
                ' w%d' % weight if weight else '',
                ' %s' % size if size is not None else '')
    # custom attributes
    elif (tag & 0x6e00) == TAG_ATTR:
        return '%s%sattr 0x%02x%s%s' % (
                'shrub' if tag & TAG_SHRUB else '',
                's' if tag & 0x100 else 'u',
                ((tag & 0x100) >> 1) + (tag & 0xff),
                ' w%d' % weight if weight else '',
                ' %s' % size if size is not None else '')
    # alt pointers
    elif tag & TAG_ALT:
        return 'alt%s%s 0x%03x%s%s' % (
                'r' if tag & TAG_R else 'b',
                'gt' if tag & TAG_GT else 'le',
                tag & 0x0fff,
                ' w%d' % weight if weight is not None else '',
                ' 0x%x' % (0xffffffff & (toff-size))
                    if size and toff is not None
                    else ' -%d' % size if size
                    else '')
    # checksum tags
    elif (tag & 0x7f00) == TAG_CKSUM:
        return 'cksum%s%s%s%s%s' % (
                'q%d' % (tag & 0x3),
                'p' if tag & TAG_PERTURB else '',
                ' 0x%02x' % (tag & 0xff) if tag & 0xf8 else '',
                ' w%d' % weight if weight else '',
                ' %s' % size if size is not None else '')
    # note tags
    elif (tag & 0x7f00) == TAG_NOTE:
        return 'note%s%s%s' % (
                ' 0x%02x' % (tag & 0xff) if tag & 0xff else '',
                ' w%d' % weight if weight else '',
                ' %s' % size if size is not None else '')
    # erased-state checksum tags
    elif (tag & 0x7f00) == TAG_ECKSUM:
        return 'ecksum%s%s%s' % (
                ' 0x%02x' % (tag & 0xff) if tag & 0xff else '',
                ' w%d' % weight if weight else '',
                ' %s' % size if size is not None else '')
    # global-checksum delta tags
    elif (tag & 0x7f00) == TAG_GCKSUMDELTA:
        if global_:
            return 'gcksum%s%s%s' % (
                    ' 0x%02x' % (tag & 0xff) if tag & 0xff else '',
                    ' w%d' % weight if weight else '',
                    ' %s' % size if size is not None else '')
        else:
            return 'gcksumdelta%s%s%s' % (
                    ' 0x%02x' % (tag & 0xff) if tag & 0xff else '',
                    ' w%d' % weight if weight else '',
                    ' %s' % size if size is not None else '')
    # unknown tags
    else:
        return '0x%04x%s%s' % (
                tag,
                ' w%d' % weight if weight is not None else '',
                ' %d' % size if size is not None else '')

# compute the difference between two paths, returning everything
# in a after the paths diverge, as well as the relevant index
def pathdelta(a, b):
    if not isinstance(a, list):
        a = list(a)
    i = 0
    for a_, b_ in zip(a, b):
        try:
            if type(a_) == type(b_) and a_ == b_:
                i += 1
            else:
                break
        # treat exceptions here as failure to match, most likely
        # the compared types are incompatible, it's the caller's
        # problem
        except Exception:
            break

    return [(i+j, a_) for j, a_ in enumerate(a[i:])]


# a simple wrapper over an open file with bd geometry
class Bd:
    def __init__(self, f, block_size=None, block_count=None):
        self.f = f
        self.block_size = block_size
        self.block_count = block_count

    def __repr__(self):
        return '<%s %s>' % (self.__class__.__name__, self.repr())

    def repr(self):
        return 'bd %sx%s' % (self.block_size, self.block_count)

    def read(self, block, off, size):
        self.f.seek(block*self.block_size + off)
        return self.f.read(size)

    def readblock(self, block):
        self.f.seek(block*self.block_size)
        return self.f.read(self.block_size)

# tagged data in an rbyd
class Rattr:
    def __init__(self, tag, weight, blocks, toff, tdata, data):
        self.tag = tag
        self.weight = weight
        if isinstance(blocks, int):
            self.blocks = (blocks,)
        else:
            self.blocks = blocks
        self.toff = toff
        self.tdata = tdata
        self.data = data

    @property
    def block(self):
        return self.blocks[0]

    @property
    def tsize(self):
        return len(self.tdata)

    @property
    def off(self):
        return self.toff + len(self.tdata)

    @property
    def size(self):
        return len(self.data)

    def __bytes__(self):
        return self.data

    def __repr__(self):
        return '<%s %s>' % (self.__class__.__name__, self.repr())

    def repr(self):
        return tagrepr(self.tag, self.weight, self.size)

    def __iter__(self):
        return iter((self.tag, self.weight, self.data))

    def __eq__(self, other):
        return ((self.tag, self.weight, self.data)
                == (other.tag, other.weight, other.data))

    def __ne__(self, other):
        return not self.__eq__(other)

    def __hash__(self):
        return hash((self.tag, self.weight, self.data))

    # convenience for did/name access
    def _parse_name(self):
        # note we return a null name for non-name tags, this is so
        # vestigial names in btree nodes act as a catch-all
        if (self.tag & 0xff00) != TAG_NAME:
            did = 0
            name = b''
        else:
            did, d = fromleb128(self.data)
            name = self.data[d:]

        # cache both
        self.did = did
        self.name = name

    @ft.cached_property
    def did(self):
        self._parse_name()
        return self.did

    @ft.cached_property
    def name(self):
        self._parse_name()
        return self.name

class Ralt:
    def __init__(self, tag, weight, blocks, toff, tdata, jump,
            color=None, followed=None):
        self.tag = tag
        self.weight = weight
        if isinstance(blocks, int):
            self.blocks = (blocks,)
        else:
            self.blocks = blocks
        self.toff = toff
        self.tdata = tdata
        self.jump = jump

        if color is not None:
            self.color = color
        else:
            self.color = 'r' if tag & TAG_R else 'b'
        self.followed = followed

    @property
    def block(self):
        return self.blocks[0]

    @property
    def tsize(self):
        return len(self.tdata)

    @property
    def off(self):
        return self.toff + len(self.tdata)

    @property
    def joff(self):
        return self.toff - self.jump

    def __repr__(self):
        return '<%s %s>' % (self.__class__.__name__, self.repr())

    def repr(self):
        return tagrepr(self.tag, self.weight, self.jump, toff=self.toff)

    def __iter__(self):
        return iter((self.tag, self.weight, self.jump))

    def __eq__(self, other):
        return ((self.tag, self.weight, self.jump)
                == (other.tag, other.weight, other.jump))

    def __ne__(self, other):
        return not self.__eq__(other)

    def __hash__(self):
        return hash((self.tag, self.weight, self.jump))


# our core rbyd type
class Rbyd:
    def __init__(self, blocks, trunk, weight, rev, eoff, cksum, data, *,
            shrub=False,
            gcksumdelta=None,
            redund=0):
        if isinstance(blocks, int):
            self.blocks = (blocks,)
        else:
            self.blocks = blocks
        self.trunk = trunk
        self.weight = weight
        self.rev = rev
        self.eoff = eoff
        self.cksum = cksum
        self.data = data

        self.shrub = shrub
        self.gcksumdelta = gcksumdelta
        self.redund = redund

    @property
    def block(self):
        return self.blocks[0]

    @property
    def corrupt(self):
        # use redund=-1 to indicate corrupt rbyds
        return self.redund >= 0

    def addr(self):
        if len(self.blocks) == 1:
            return '0x%x.%x' % (self.block, self.trunk)
        else:
            return '0x{%s}.%x' % (
                    ','.join('%x' % block for block in self.blocks),
                    self.trunk)

    def __repr__(self):
        return '<%s %s>' % (self.__class__.__name__, self.repr())

    def repr(self):
        return 'rbyd %s w%s' % (self.addr(), self.weight)

    def __bool__(self):
        # use redund=-1 to indicate corrupt rbyds
        return self.redund >= 0

    def __eq__(self, other):
        return ((frozenset(self.blocks), self.trunk)
                == (frozenset(other.blocks), other.trunk))

    def __ne__(self, other):
        return not self.__eq__(other)

    def __hash__(self):
        return hash((frozenset(self.blocks), self.trunk))

    @classmethod
    def _fetch(cls, data, block, trunk=None):
        # fetch the rbyd
        rev = fromle32(data, 0)
        cksum = 0
        cksum_ = crc32c(data[0:4])
        cksum__ = cksum_
        perturb = False
        eoff = 0
        eoff_ = None
        j_ = 4
        trunk_ = 0
        trunk__ = 0
        trunk___ = 0
        weight = 0
        weight_ = 0
        weight__ = 0
        gcksumdelta = None
        gcksumdelta_ = None
        while j_ < len(data) and (not trunk or eoff <= trunk):
            # read next tag
            v, tag, w, size, d = fromtag(data, j_)
            if v != parity(cksum__):
                break
            cksum__ ^= 0x00000080 if v else 0
            cksum__ = crc32c(data[j_:j_+d], cksum__)
            j_ += d
            if not tag & TAG_ALT and j_ + size > len(data):
                break

            # take care of cksums
            if not tag & TAG_ALT:
                if (tag & 0xff00) != TAG_CKSUM:
                    cksum__ = crc32c(data[j_:j_+size], cksum__)

                    # found a gcksumdelta?
                    if (tag & 0xff00) == TAG_GCKSUMDELTA:
                        gcksumdelta_ = Rattr(tag, w, block, j_-d,
                                data[j_-d:j_],
                                data[j_:j_+size])

                # found a cksum?
                else:
                    # check cksum
                    cksum___ = fromle32(data, j_)
                    if cksum__ != cksum___:
                        break
                    # commit what we have
                    eoff = eoff_ if eoff_ else j_ + size
                    cksum = cksum_
                    trunk_ = trunk__
                    weight = weight_
                    gcksumdelta = gcksumdelta_
                    gcksumdelta_ = None
                    # update perturb bit
                    perturb = bool(tag & TAG_PERTURB)
                    # revert to data cksum and perturb
                    cksum__ = cksum_ ^ (0xfca42daf if perturb else 0)

            # evaluate trunks
            if (tag & 0xf000) != TAG_CKSUM:
                if not (trunk and j_-d > trunk and not trunk___):
                    # new trunk?
                    if not trunk___:
                        trunk___ = j_-d
                        weight__ = 0

                    # keep track of weight
                    weight__ += w

                    # end of trunk?
                    if not tag & TAG_ALT:
                        # update trunk/weight unless we found a shrub or an
                        # explicit trunk (which may be a shrub) is requested
                        if not tag & TAG_SHRUB or trunk___ == trunk:
                            trunk__ = trunk___
                            weight_ = weight__
                            # keep track of eoff for best matching trunk
                            if trunk and j_ + size > trunk:
                                eoff_ = j_ + size
                                eoff = eoff_
                                cksum = cksum__ ^ (
                                        0xfca42daf if perturb else 0)
                                trunk_ = trunk__
                                weight = weight_
                                gcksumdelta = gcksumdelta_
                        trunk___ = 0

                # update canonical checksum, xoring out any perturb state
                cksum_ = cksum__ ^ (0xfca42daf if perturb else 0)

            if not tag & TAG_ALT:
                j_ += size

        return cls(block, trunk_, weight, rev, eoff, cksum, data,
                gcksumdelta=gcksumdelta,
                redund=0 if trunk_ else -1)

    @classmethod
    def fetch(cls, bd, blocks, trunk=None):
        # multiple blocks?
        if not isinstance(blocks, int):
            # fetch all blocks
            rbyds = [cls.fetch(bd, block, trunk) for block in blocks]

            # determine most recent revision/trunk
            rev, trunk = None, None
            for rbyd in rbyds:
                # compare with sequence arithmetic
                if rbyd and (
                        rev is None
                            or not ((rbyd.rev - rev) & 0x80000000)
                            or (rbyd.rev == rev and rbyd.trunk > trunk)):
                    rev, trunk = rbyd.rev, rbyd.trunk
            # sort for reproducibility
            rbyds.sort(key=lambda rbyd: (
                    # prioritize valid redund blocks
                    0 if rbyd and rbyd.rev == rev and rbyd.trunk == trunk
                        else 1,
                    # default to sorting by block
                    rbyd.block))

            # choose an active rbyd
            rbyd = rbyds[0]
            # keep track of the other blocks
            rbyd.blocks = tuple(rbyd.block for rbyd in rbyds)
            # keep track of how many redund blocks are valid
            rbyd.redund = -1 + sum(1 for rbyd in rbyds
                    if rbyd and rbyd.rev == rev and rbyd.trunk == trunk)
            # and patch the gcksumdelta if we have one
            if rbyd.gcksumdelta is not None:
                rbyd.gcksumdelta.blocks = rbyd.blocks
            return rbyd

        # seek/read the block
        block = blocks
        data = bd.readblock(block)

        # fetch the rbyd
        return cls._fetch(data, block, trunk)

    @classmethod
    def fetchck(cls, bd, blocks, trunk, weight, cksum):
        # try to fetch the rbyd normally
        rbyd = cls.fetch(bd, blocks, trunk)

        # cksum mismatch? trunk/weight mismatch?
        if (rbyd.cksum != cksum
                or rbyd.trunk != trunk
                or rbyd.weight != weight):
            # mark as corrupt and keep track of expected trunk/weight
            rbyd.redund = -1
            rbyd.trunk = trunk
            rbyd.weight = weight

        return rbyd

    @classmethod
    def fetchshrub(cls, rbyd, trunk):
        # steal the original rbyd's data
        #
        # this helps avoid race conditions with cksums and stuff
        shrub = cls._fetch(rbyd.data, rbyd.block, trunk)
        shrub.blocks = rbyd.blocks
        shrub.shrub = True
        return shrub

    def lookupnext(self, rid, tag=None, *,
            path=False):
        if not self or rid >= self.weight:
            if path:
                return None, None, []
            else:
                return None, None

        tag = max(tag or 0, 0x1)
        lower = 0
        upper = self.weight
        path_ = []

        # descend down tree
        j = self.trunk
        while True:
            _, alt, w, jump, d = fromtag(self.data, j)

            # found an alt?
            if alt & TAG_ALT:
                # follow?
                if ((rid, tag & 0xfff) > (upper-w-1, alt & 0xfff)
                        if alt & TAG_GT
                        else ((rid, tag & 0xfff)
                            <= (lower+w-1, alt & 0xfff))):
                    lower += upper-lower-w if alt & TAG_GT else 0
                    upper -= upper-lower-w if not alt & TAG_GT else 0
                    j = j - jump

                    if path:
                        # figure out which color
                        if alt & TAG_R:
                            _, nalt, _, _, _ = fromtag(self.data, j+jump+d)
                            if nalt & TAG_R:
                                color = 'y'
                            else:
                                color = 'r'
                        else:
                            color = 'b'

                        path_.append(Ralt(
                                alt, w, self.blocks, j+jump,
                                self.data[j+jump:j+jump+d], jump,
                                color=color,
                                followed=True))

                # stay on path
                else:
                    lower += w if not alt & TAG_GT else 0
                    upper -= w if alt & TAG_GT else 0
                    j = j + d

                    if path:
                        # figure out which color
                        if alt & TAG_R:
                            _, nalt, _, _, _ = fromtag(self.data, j)
                            if nalt & TAG_R:
                                color = 'y'
                            else:
                                color = 'r'
                        else:
                            color = 'b'

                        path_.append(Ralt(
                                alt, w, self.blocks, j-d,
                                self.data[j-d:j], jump,
                                color=color,
                                followed=False))

            # found tag
            else:
                rid_ = upper-1
                tag_ = alt
                w_ = upper-lower

                if not tag_ or (rid_, tag_) < (rid, tag):
                    if path:
                        return None, None, path_
                    else:
                        return None, None

                rattr_ = Rattr(tag_, w_, self.blocks, j,
                        self.data[j:j+d],
                        self.data[j+d:j+d+jump])
                if path:
                    return rid_, rattr_, path_
                else:
                    return rid_, rattr_

    def lookup(self, rid, tag=None, mask=None, *,
            path=False):
        if tag is None:
            tag, mask = 0, 0xffff
        if mask is None:
            mask = 0

        r = self.lookupnext(rid, tag & ~mask,
                path=path)
        if path:
            rid_, rattr_, path_ = r
        else:
            rid_, rattr_ = r
        if (rid_ is None
                or rid_ != rid
                or (rattr_.tag & ~mask & 0xfff)
                    != (tag & ~mask & 0xfff)):
            if path:
                return None, path_
            else:
                return None

        if path:
            return rattr_, path_
        else:
            return rattr_

    def rids(self, *,
            path=False):
        rid = -1
        while True:
            r = self.lookupnext(rid,
                    path=path)
            if path:
                rid, name, path_ = r
            else:
                rid, name = r
            # found end of tree?
            if rid is None:
                break

            if path:
                yield rid, name, path_
            else:
                yield rid, name
            rid += 1

    def rattrs(self, rid=None, tag=None, mask=None, *,
            path=False):
        if rid is None:
            rid, tag = -1, 0
            while True:
                r = self.lookupnext(rid, tag+0x1,
                        path=path)
                if path:
                    rid, rattr, path_ = r
                else:
                    rid, rattr = r
                # found end of tree?
                if rid is None:
                    break

                if path:
                    yield rid, rattr, path_
                else:
                    yield rid, rattr
                tag = rattr.tag
        else:
            if tag is None:
                tag, mask = 0, 0xffff
            if mask is None:
                mask = 0

            tag_ = max((tag & ~mask) - 1, 0)
            while True:
                r = self.lookupnext(rid, tag_+0x1,
                        path=path)
                if path:
                    rid_, rattr_, path_ = r
                else:
                    rid_, rattr_ = r
                # found end of tree?
                if (rid_ is None
                        or rid_ != rid
                        or (rattr_.tag & ~mask & 0xfff)
                            != (tag & ~mask & 0xfff)):
                    break

                if path:
                    yield rattr_, path_
                else:
                    yield rattr_
                tag_ = rattr_.tag

    # lookup by name
    def namelookup(self, did, name):
        # binary search
        best = None, None
        lower = 0
        upper = self.weight
        while lower < upper:
            rid, name_ = self.lookupnext(
                lower + (upper-1-lower)//2)
            if rid is None:
                break

            # bisect search space
            if (name_.did, name_.name) > (did, name):
                upper = rid-(name_.weight-1)
            elif (name_.did, name_.name) < (did, name):
                lower = rid + 1
                # keep track of best match
                best = rid, name_
            else:
                # found a match
                return rid, name_

        return best


# our rbyd btree type
class Btree:
    def __init__(self, bd, rbyd):
        self.bd = bd
        self.rbyd = rbyd

    @property
    def block(self):
        return self.rbyd.block

    @property
    def blocks(self):
        return self.rbyd.blocks

    @property
    def trunk(self):
        return self.rbyd.trunk

    @property
    def weight(self):
        return self.rbyd.weight

    @property
    def rev(self):
        return self.rbyd.rev

    @property
    def cksum(self):
        return self.rbyd.cksum

    @property
    def shrub(self):
        return self.rbyd.shrub

    def addr(self):
        return self.rbyd.addr()

    def __repr__(self):
        return '<%s %s>' % (self.__class__.__name__, self.repr())

    def repr(self):
        return 'btree %s w%s' % (self.addr(), self.weight)

    def __eq__(self, other):
        return self.rbyd == other.rbyd

    def __ne__(self, other):
        return not self.__eq__(other)

    def __hash__(self):
        return hash(self.rbyd)

    @classmethod
    def fetch(cls, bd, blocks, trunk=None):
        # rbyd fetch does most of the work here
        rbyd = Rbyd.fetch(bd, blocks, trunk)
        return cls(bd, rbyd)

    @classmethod
    def fetchck(cls, bd, blocks, trunk, weight, cksum):
        # rbyd fetchck does most of the work here
        rbyd = Rbyd.fetchck(bd, blocks, trunk, weight, cksum)
        return cls(bd, rbyd)

    @classmethod
    def fetchshrub(cls, bd, rbyd, trunk):
        shrub = Rbyd.fetchshrub(rbyd, trunk)
        return cls(bd, shrub)

    def lookupleaf(self, bid, *,
            path=False,
            depth=None):
        if not self or bid >= self.weight:
            if path:
                return None, None, None, None, []
            else:
                return None, None, None, None

        rbyd = self.rbyd
        rid = bid
        depth_ = 1
        path_ = []

        while True:
            # corrupt branch?
            if not rbyd:
                if path:
                    return bid, rbyd, rid, None, path_
                else:
                    return bid, rbyd, rid, None

            # first tag indicates the branch's weight
            rid_, name_ = rbyd.lookupnext(rid)
            if rid_ is None:
                if path:
                    return None, None, None, None, path_
                else:
                    return None, None, None, None

            # keep track of path
            if path:
                path_.append((bid + (rid_-rid), rbyd, rid_, name_))

            # find branch tag if there is one
            branch_ = rbyd.lookup(rid_, TAG_BRANCH, 0x3)

            # descend down branch?
            if branch_ is not None and (
                    not depth or depth_ < depth):
                block, trunk, cksum, _ = frombranch(branch_.data)
                rbyd = Rbyd.fetchck(self.bd, block, trunk, name_.weight,
                        cksum)

                rid -= (rid_-(name_.weight-1))
                depth_ += 1

            else:
                if path:
                    return bid + (rid_-rid), rbyd, rid_, name_, path_
                else:
                    return bid + (rid_-rid), rbyd, rid_, name_

    # the non-leaf variants discard the rbyd info, these can be a bit
    # more convenient, but at a performance cost
    def lookupnext(self, bid, *,
            path=False,
            depth=None):
        # just discard the rbyd info
        r = self.lookupleaf(bid,
                path=path,
                depth=depth)
        if path:
            bid, rbyd, rid, name, path_ = r
        else:
            bid, rbyd, rid, name = r

        if path:
            return bid, name, path_
        else:
            return bid, name

    def lookup(self, bid, tag=None, mask=None, *,
            path=False,
            depth=None):
        # lookup rbyd in btree
        #
        # note this function expects bid to be known, use lookupnext
        # first if you don't care about the exact bid (or better yet,
        # lookupleaf and call lookup on the returned rbyd)
        #
        # this matches rbyd's lookup behavior, which needs a known rid
        # to avoid a double lookup
        r = self.lookupleaf(bid,
                path=path,
                depth=depth)
        if path:
            bid_, rbyd_, rid_, name_, path_ = r
        else:
            bid_, rbyd_, rid_, name_ = r
        if bid_ is None or bid_ != bid:
            if path:
                return None, path_
            else:
                return None

        # lookup tag in rbyd
        rattr_ = rbyd_.lookup(rid_, tag, mask)
        if rattr_ is None:
            if path:
                return None, path_
            else:
                return None

        if path:
            return rattr_, path_
        else:
            return rattr_

    # note leaves only iterates over leaf rbyds, whereas traverse
    # traverses all rbyds
    def leaves(self, *,
            path=False,
            depth=None):
        # include our root rbyd even if the weight is zero
        if self.weight == 0:
            if path:
                yield -1, self.rbyd, []
            else:
                yield -1, self.rbyd
            return

        bid = 0
        while True:
            r = self.lookupleaf(bid,
                    path=path,
                    depth=depth)
            if r:
                bid, rbyd, rid, name, path_ = r
            else:
                bid, rbyd, rid, name = r
            if bid is None:
                break

            if path:
                yield (bid-rid + (rbyd.weight-1), rbyd,
                        # path tail is usually redundant unless corrupt
                        path_[:-1]
                            if path_ and path_[-1][1] == rbyd
                            else path_)
            else:
                yield bid-rid + (rbyd.weight-1), rbyd
            bid += rbyd.weight - rid + 1

    def traverse(self, *,
            path=False,
            depth=None):
        ptrunk_ = []
        for bid, rbyd, path_ in self.leaves(
                path=True,
                depth=depth):
            # we only care about the rbyds here
            trunk_ = ([(bid_-rid_ + (rbyd_.weight-1), rbyd_)
                        for bid_, rbyd_, rid_, name_ in path_]
                    + [(bid, rbyd)])
            for d, (bid_, rbyd_) in pathdelta(
                    trunk_, ptrunk_):
                # but include branch rids in the path if requested
                if path:
                    yield bid_, rbyd_, path_[:d]
                else:
                    yield bid_, rbyd_
            ptrunk_ = trunk_

    # note bids/rattrs do _not_ include corrupt btree nodes!
    def bids(self, *,
            leaves=False,
            path=False,
            depth=None):
        for r in self.leaves(
                path=path,
                depth=depth):
            if path:
                bid, rbyd, path_ = r
            else:
                bid, rbyd = r
            for rid, name in rbyd.rids():
                bid_ = bid-(rbyd.weight-1) + rid
                if leaves:
                    if path:
                        yield (bid_, rbyd, rid, name,
                                path_+[(bid_, rbyd, rid, name)])
                    else:
                        yield bid_, rbyd, rid, name
                else:
                    if path:
                        yield (bid_, name,
                                path_+[(bid_, rbyd, rid, name)])
                    else:
                        yield bid_, name

    def rattrs(self, bid=None, tag=None, mask=None, *,
            leaves=False,
            path=False,
            depth=None):
        if bid is None:
            for r in self.leaves(
                    path=path,
                    depth=depth):
                if path:
                    bid, rbyd, path_ = r
                else:
                    bid, rbyd = r
                for rid, name in rbyd.rids():
                    bid_ = bid-(rbyd.weight-1) + rid
                    for rattr in rbyd.rattrs(rid):
                        if leaves:
                            if path:
                                yield (bid_, rbyd, rid, rattr,
                                        path_+[(bid_, rbyd, rid, name)])
                            else:
                                yield bid_, rbyd, rid, rattr
                        else:
                            if path:
                                yield (bid_, rattr,
                                        path_+[(bid_, rbyd, rid, name)])
                            else:
                                yield bid_, rattr
        else:
            r = self.lookupleaf(bid,
                    path=path,
                    depth=depth)
            if path:
                bid, rbyd, rid, name, path_ = r
            else:
                bid, rbyd, rid, name = r
            if bid is None:
                return

            for rattr in rbyd.rattrs(rid, tag, mask):
                if leaves:
                    if path:
                        yield rbyd, rid, rattr, path_
                    else:
                        yield rbyd, rid, rattr
                else:
                    if path:
                        yield rattr, path_
                    else:
                        yield rattr

    # lookup by name
    def namelookupleaf(self, did, name, *,
            path=False,
            depth=None):
        rbyd = self.rbyd
        bid = 0
        depth_ = 1
        path_ = []

        while True:
            # corrupt branch?
            if not rbyd:
                bid_ = bid+(rbyd.weight-1)
                if path:
                    return bid_, rbyd, rbyd.weight-1, None, path_
                else:
                    return bid_, rbyd, rbyd.weight-1, None

            rid_, name_ = rbyd.namelookup(did, name)

            # keep track of path
            if path:
                path_.append((bid + rid_, rbyd, rid_, name_))

            # find branch tag if there is one
            branch_ = rbyd.lookup(rid_, TAG_BRANCH, 0x3)

            # found another branch
            if branch_ is not None and (
                    not depth or depth_ < depth):
                block, trunk, cksum, _ = frombranch(branch_.data)
                rbyd = Rbyd.fetchck(self.bd, block, trunk, name_.weight,
                        cksum)

                # update our bid
                bid += rid_ - (name_.weight-1)
                depth_ += 1

            # found best match
            else:
                if path:
                    return bid + rid_, rbyd, rid_, name_, path_
                else:
                    return bid + rid_, rbyd, rid_, name_

    def namelookup(self, bid, *,
            path=False,
            depth=None):
        # just discard the rbyd info
        r = self.namelookupleaf(did, name,
                path=path,
                depth=depth)
        if path:
            bid, rbyd, rid, name, path_ = r
        else:
            bid, rbyd, rid, name = r

        if path:
            return bid, name, path_
        else:
            return bid, name


# tree renderer
class TreeArt:
    # tree branches are an abstract thing for tree rendering
    class Branch(co.namedtuple('Branch', ['a', 'b', 'z', 'color'])):
        __slots__ = ()
        def __new__(cls, a, b, z=0, color='b'):
            # a and b are context specific
            return super().__new__(cls, a, b, z, color)

        def __repr__(self):
            return '%s(%s, %s, %s, %s)' % (
                    self.__class__.__name__,
                    self.a,
                    self.b,
                    self.z,
                    self.color)

        # don't include color in branch comparisons, or else our tree
        # renderings can end up with inconsistent colors between runs
        def __eq__(self, other):
            return (self.a, self.b, self.z) == (other.a, other.b, other.z)

        def __ne__(self, other):
            return (self.a, self.b, self.z) != (other.a, other.b, other.z)

        def __hash__(self):
            return hash((self.a, self.b, self.z))

        # also order by z first, which can be useful for reproducibly
        # prioritizing branches when simplifying trees
        def __lt__(self, other):
            return (self.z, self.a, self.b) < (other.z, other.a, other.b)

        def __le__(self, other):
            return (self.z, self.a, self.b) <= (other.z, other.a, other.b)

        def __gt__(self, other):
            return (self.z, self.a, self.b) > (other.z, other.a, other.b)

        def __ge__(self, other):
            return (self.z, self.a, self.b) >= (other.z, other.a, other.b)

        # apply a function to a/b while trying to avoid copies
        def map(self, filter_, map_=None):
            if map_ is None:
                filter_, map_ = None, filter_

            a = self.a
            if filter_ is None or filter_(a):
                a = map_(a)

            b = self.b
            if filter_ is None or filter_(b):
                b = map_(b)

            if a != self.a or b != self.b:
                return self.__class__(
                        a if a != self.a else self.a,
                        b if b != self.b else self.b,
                        self.z,
                        self.color)
            else:
                return self

    def __init__(self, tree):
        self.tree = tree
        self.depth = max((t.z+1 for t in tree), default=0)
        if self.depth > 0:
            self.width = 2*self.depth + 2
        else:
            self.width = 0

    def __iter__(self):
        return iter(self.tree)

    def __bool__(self):
        return bool(self.tree)

    def __len__(self):
        return len(self.tree)

    # render an rbyd rbyd tree for debugging
    @classmethod
    def _fromrbydrtree(cls, rbyd, **args):
        trunks = co.defaultdict(lambda: (-1, 0))
        alts = co.defaultdict(lambda: {})

        for rid, rattr, path in rbyd.rattrs(path=True):
            # keep track of trunks/alts
            trunks[rattr.toff] = (rid, rattr.tag)

            for ralt in path:
                if ralt.followed:
                    alts[ralt.toff] |= {'f': ralt.joff, 'c': ralt.color}
                else:
                    alts[ralt.toff] |= {'nf': ralt.off, 'c': ralt.color}

        if args.get('tree_rbyd'):
            # treat unreachable alts as converging paths
            for j_, alt in alts.items():
                if 'f' not in alt:
                    alt['f'] = alt['nf']
                elif 'nf' not in alt:
                    alt['nf'] = alt['f']

        else:
            # prune any alts with unreachable edges
            pruned = {}
            for j, alt in alts.items():
                if 'f' not in alt:
                    pruned[j] = alt['nf']
                elif 'nf' not in alt:
                    pruned[j] = alt['f']
            for j in pruned.keys():
                del alts[j]

            for j, alt in alts.items():
                while alt['f'] in pruned:
                    alt['f'] = pruned[alt['f']]
                while alt['nf'] in pruned:
                    alt['nf'] = pruned[alt['nf']]

        # find the trunk and depth of each alt
        def rec_trunk(j):
            if j not in alts:
                return trunks[j]
            else:
                if 'nft' not in alts[j]:
                    alts[j]['nft'] = rec_trunk(alts[j]['nf'])
                return alts[j]['nft']

        for j in alts.keys():
            rec_trunk(j)
        for j, alt in alts.items():
            if alt['f'] in alts:
                alt['ft'] = alts[alt['f']]['nft']
            else:
                alt['ft'] = trunks[alt['f']]

        def rec_height(j):
            if j not in alts:
                return 0
            else:
                if 'h' not in alts[j]:
                    alts[j]['h'] = max(
                            rec_height(alts[j]['f']),
                            rec_height(alts[j]['nf'])) + 1
                return alts[j]['h']

        for j in alts.keys():
            rec_height(j)

        t_depth = max((alt['h']+1 for alt in alts.values()), default=0)

        # convert to more general tree representation
        tree = set()
        for j, alt in alts.items():
            # note all non-trunk edges should be colored black
            tree.add(cls.Branch(
                    alt['nft'],
                    alt['nft'],
                    t_depth-1 - alt['h'],
                    alt['c']))
            if alt['ft'] != alt['nft']:
                tree.add(cls.Branch(
                        alt['nft'],
                        alt['ft'],
                        t_depth-1 - alt['h'],
                        'b'))

        return cls(tree)

    # render an rbyd btree tree for debugging
    @classmethod
    def _fromrbydbtree(cls, rbyd, **args):
        # for rbyds this is just a pointer to every rid
        tree = set()
        root = None
        for rid, name in rbyd.rids():
            b = (rid, name.tag)
            if root is None:
                root = b
            tree.add(cls.Branch(root, b))
        return cls(tree)

    # render an rbyd tree for debugging
    @classmethod
    def fromrbyd(cls, rbyd, **args):
        if args.get('tree_btree'):
            return cls._fromrbydbtree(rbyd, **args)
        else:
            return cls._fromrbydrtree(rbyd, **args)

    # render some nice ascii trees
    def repr(self, x, color=False):
        if self.depth == 0:
            return ''

        def branchrepr(tree, x, d, was):
            for t in tree:
                if t.z == d and t.b == x:
                    if any(t.z == d and t.a == x
                            for t in tree):
                        return '+-', t.color, t.color
                    elif any(t.z == d
                                and x > min(t.a, t.b)
                                and x < max(t.a, t.b)
                            for t in tree):
                        return '|-', t.color, t.color
                    elif t.a < t.b:
                        return '\'-', t.color, t.color
                    else:
                        return '.-', t.color, t.color
            for t in tree:
                if t.z == d and t.a == x:
                    return '+ ', t.color, None
            for t in tree:
                if (t.z == d
                        and x > min(t.a, t.b)
                        and x < max(t.a, t.b)):
                    return '| ', t.color, was
            if was:
                return '--', was, was
            return '  ', None, None

        trunk = []
        was = None
        for d in range(self.depth):
            t, c, was = branchrepr(self.tree, x, d, was)

            trunk.append('%s%s%s%s' % (
                    '\x1b[33m' if color and c == 'y'
                        else '\x1b[31m' if color and c == 'r'
                        else '\x1b[1;30m' if color and c == 'b'
                        else '',
                    t,
                    ('>' if was else ' ') if d == self.depth-1 else '',
                    '\x1b[m' if color and c else ''))

        return '%s ' % ''.join(trunk)

# some more renderers

# render a btree rbyd tree for debugging
@classmethod
def _treeartfrombtreertree(cls, btree, *,
        depth=None,
        inner=False,
        **args):
    # precompute rbyd trees so we know the max depth at each layer
    # to nicely align trees
    rtrees = {}
    rdepths = {}
    for bid, rbyd, path in btree.traverse(path=True, depth=depth):
        if not rbyd:
            continue

        rtree = cls.fromrbyd(rbyd, **args)
        rtrees[rbyd] = rtree
        rdepths[len(path)] = max(rdepths.get(len(path), 0), rtree.depth)

    # map rbyd branches into our btree space
    tree = set()
    for bid, rbyd, path in btree.traverse(path=True, depth=depth):
        if not rbyd:
            continue

        # yes we can find new rbyds if disk is being mutated, just
        # ignore these
        if rbyd not in rtrees:
            continue

        rtree = rtrees[rbyd]
        rz = max((t.z+1 for t in rtree), default=0)
        d = sum(rdepths[d]+1 for d in range(len(path)))

        # map into our btree space
        for t in rtree:
            # note we adjust our bid to be left-leaning, this allows
            # a global order and makes tree rendering quite a bit easier
            a_rid, a_tag = t.a
            b_rid, b_tag = t.b
            _, (_, a_w, _) = rbyd.lookupnext(a_rid)
            _, (_, b_w, _) = rbyd.lookupnext(b_rid)
            tree.add(cls.Branch(
                    (bid-(rbyd.weight-1)+a_rid-(a_w-1), len(path), a_tag),
                    (bid-(rbyd.weight-1)+b_rid-(b_w-1), len(path), b_tag),
                    d + rdepths[len(path)]-rz + t.z,
                    t.color))

        # connect rbyd branches to rbyd roots
        if path:
            l_bid, l_rbyd, l_rid, l_name = path[-1]
            l_branch = l_rbyd.lookup(l_rid, TAG_BRANCH, 0x3)

            if rtree:
                r_rid, r_tag = min(rtree, key=lambda t: t.z).a
                _, (_, r_w, _) = rbyd.lookupnext(r_rid)
            else:
                r_rid, (r_tag, r_w, _) = rbyd.lookupnext(-1)

            tree.add(cls.Branch(
                    (l_bid-(l_name.weight-1), len(path)-1, l_branch.tag),
                    (bid-(rbyd.weight-1)+r_rid-(r_w-1), len(path), r_tag),
                    d-1))

    # remap branches to leaves if we aren't showing inner branches
    if not inner:
        # step through each btree layer backwards
        b_depth = max((t.a[1]+1 for t in tree), default=0)

        for d in reversed(range(b_depth-1)):
            # find bid ranges at this level
            bids = set()
            for t in tree:
                if t.b[1] == d:
                    bids.add(t.b[0])
            bids = sorted(bids)

            # find the best root for each bid range
            roots = {}
            for i in range(len(bids)):
                for t in tree:
                    if (t.a[1] > d
                            and t.a[0] >= bids[i]
                            and (i == len(bids)-1 or t.a[0] < bids[i+1])
                            and (bids[i] not in roots
                                or t < roots[bids[i]])):
                        roots[bids[i]] = t

            # remap branches to leaf-roots
            tree = {t.map(
                        lambda x: x[1] == d and x[0] in roots,
                        lambda x: roots[x[0]].a)
                    for t in tree}

    return cls(tree)

# render a btree btree tree for debugging
@classmethod
def _treeartfrombtreebtree(cls, btree, *,
        depth=None,
        inner=False,
        **args):
    # find all branches
    tree = set()
    root = None
    branches = {}
    for bid, name, path in btree.bids(
            path=True,
            depth=depth):
        # create branch for each jump in path
        #
        # note we adjust our bid to be left-leaning, this allows
        # a global order and makes tree rendering quite a bit easier
        a = root
        for d, (bid_, rbyd_, rid_, name_) in enumerate(path):
            # map into our btree space
            bid__ = bid_-(name_.weight-1)
            b = (bid__, d, name_.tag)

            # remap branches to leaves if we aren't showing inner
            # branches
            if not inner:
                if b not in branches:
                    bid_, rbyd_, rid_, name_ = path[-1]
                    bid__ = bid_-(name_.weight-1)
                    branches[b] = (bid__, len(path)-1, name_.tag)
                b = branches[b]

            # render the root path on first rid, this is arbitrary
            if root is None:
                root, a = b, b

            tree.add(cls.Branch(a, b, d))
            a = b

    return cls(tree)

# render a btree tree for debugging
@classmethod
def treeartfrombtree(cls, btree, **args):
    if args.get('tree_btree'):
        return cls._frombtreebtree(btree, **args)
    else:
        return cls._frombtreertree(btree, **args)

TreeArt._frombtreertree = _treeartfrombtreertree
TreeArt._frombtreebtree = _treeartfrombtreebtree
TreeArt.frombtree = treeartfrombtree


def main(disk, roots=None, *,
        trunk=None,
        block_size=None,
        block_count=None,
        quiet=False,
        color='auto',
        **args):
    # figure out what color should be
    if color == 'auto':
        color = sys.stdout.isatty()
    elif color == 'always':
        color = True
    else:
        color = False

    # is bd geometry specified?
    if isinstance(block_size, tuple):
        block_size, block_count_ = block_size
        if block_count is None:
            block_count = block_count_

    # flatten roots, default to block 0
    roots = list(it.chain.from_iterable(roots)) if roots else [0]

    # roots may also encode trunks
    roots, trunk = (
            [block[0] if isinstance(block, tuple)
                    else block
                for block in roots],
            trunk if trunk is not None
                else ft.reduce(
                    lambda x, y: y,
                    (block[1] for block in roots
                        if isinstance(block, tuple)),
                    None))

    # we seek around a bunch, so just keep the disk open
    with open(disk, 'rb') as f:
        # if block_size is omitted, assume the block device is one big block
        if block_size is None:
            f.seek(0, os.SEEK_END)
            block_size = f.tell()

        # fetch the btree
        bd = Bd(f, block_size, block_count)
        btree = Btree.fetch(bd, roots, trunk)

        # print some information about the btree
        if not quiet:
            print('btree %s w%d, rev %08x, cksum %08x' % (
                    btree.addr(),
                    btree.weight,
                    btree.rev,
                    btree.cksum))

        # precompute tree renderings
        t_width = 0
        if (args.get('tree')
                or args.get('tree_rbyd')
                or args.get('tree_btree')):
            treeart = TreeArt.frombtree(btree, **args)
            t_width = treeart.width

        # dynamically size the id field
        w_width = mt.ceil(mt.log10(max(1, btree.weight)+1))

        # prbyd keeps track of the last rendered rbyd, we update
        # in dbg_branch to always print interleaved addresses
        prbyd = None
        def dbg_branch(d, bid, rbyd, rid, name):
            nonlocal prbyd

            # show human-readable representation
            for rattr in rbyd.rattrs(rid):
                print('%10s %s%*s %-*s  %s' % (
                        '%04x.%04x:' % (rbyd.block, rbyd.trunk)
                            if prbyd is None or rbyd != prbyd
                            else '',
                        treeart.repr((bid-(name.weight-1), d, rattr.tag), color)
                            if args.get('tree')
                                or args.get('tree_rbyd')
                                or args.get('tree_btree')
                            else '',
                        2*w_width+1, '%d-%d' % (bid-(rattr.weight-1), bid)
                            if rattr.weight > 1
                            else bid if rattr.weight > 0
                            else '',
                        21+w_width, rattr.repr(),
                        next(xxd(rattr.data, 8), '')
                            if not args.get('raw')
                                and not args.get('no_truncate')
                            else ''))
                prbyd = rbyd

                # show on-disk encoding of tags/data
                if args.get('raw'):
                    for o, line in enumerate(xxd(rattr.tdata)):
                        print('%9s: %*s%*s %s' % (
                                '%04x' % (rattr.toff + o*16),
                                t_width, '',
                                2*w_width+1, '',
                                line))
                if args.get('raw') or args.get('no_truncate'):
                    for o, line in enumerate(xxd(rattr.data)):
                        print('%9s: %*s%*s %s' % (
                                '%04x' % (rattr.off + o*16),
                                t_width, '',
                                2*w_width+1, '',
                                line))

        # traverse and print entries
        ppath = []
        corrupted = False
        for bid, rbyd, path in btree.leaves(
                path=True,
                depth=args.get('depth')):
            # print inner branches if requested
            if args.get('inner') and not quiet:
                for d, (bid_, rbyd_, rid_, name_) in pathdelta(
                        path, ppath):
                    dbg_branch(d, bid_, rbyd_, rid_, name_)
            ppath = path

            # corrupted? try to keep printing the tree
            if not rbyd:
                if not quiet:
                    print('%s%04x.%04x: %*s%s%s' % (
                            '\x1b[31m' if color else '',
                            rbyd.block, rbyd.trunk,
                            t_width, '',
                            '(corrupted rbyd %s)' % rbyd.addr(),
                            '\x1b[m' if color else ''))
                prbyd = None
                corrupted = True
                continue

            if not quiet:
                for rid, name in rbyd.rids():
                    bid_ = bid-(rbyd.weight-1) + rid
                    # show the leaf entry/branch
                    dbg_branch(len(path), bid_, rbyd, rid, name)

    if args.get('error_on_corrupt') and corrupted:
        sys.exit(2)


if __name__ == "__main__":
    import argparse
    import sys
    parser = argparse.ArgumentParser(
            description="Debug rbyd B-trees.",
            allow_abbrev=False)
    parser.add_argument(
            'disk',
            help="File containing the block device.")
    parser.add_argument(
            'roots',
            nargs='*',
            type=rbydaddr,
            help="Block address of the roots of the tree.")
    parser.add_argument(
            '--trunk',
            type=lambda x: int(x, 0),
            help="Use this offset as the trunk of the tree.")
    parser.add_argument(
            '-b', '--block-size',
            type=bdgeom,
            help="Block size/geometry in bytes. Accepts <size>x<count>.")
    parser.add_argument(
            '--block-count',
            type=lambda x: int(x, 0),
            help="Block count in blocks.")
    parser.add_argument(
            '-q', '--quiet',
            action='store_true',
            help="Don't show anything, useful when checking for errors.")
    parser.add_argument(
            '--color',
            choices=['never', 'always', 'auto'],
            default='auto',
            help="When to use terminal colors. Defaults to 'auto'.")
    parser.add_argument(
            '-x', '--raw',
            action='store_true',
            help="Show the raw data including tag encodings.")
    parser.add_argument(
            '-T', '--no-truncate',
            action='store_true',
            help="Don't truncate, show the full contents.")
    parser.add_argument(
            '-t', '--tree',
            action='store_true',
            help="Show the rbyd tree.")
    parser.add_argument(
            '-R', '--tree-rbyd',
            action='store_true',
            help="Show the full rbyd tree.")
    parser.add_argument(
            '-B', '--tree-btree',
            action='store_true',
            help="Show a simplified btree tree.")
    parser.add_argument(
            '-i', '--inner',
            action='store_true',
            help="Show inner branches.")
    parser.add_argument(
            '-z', '--depth',
            nargs='?',
            type=lambda x: int(x, 0),
            const=0,
            help="Depth of the btree to show.")
    parser.add_argument(
            '-e', '--error-on-corrupt',
            action='store_true',
            help="Error if B-tree is corrupt.")
    sys.exit(main(**{k: v
            for k, v in vars(parser.parse_intermixed_args()).items()
            if v is not None}))