#!/usr/bin/env python3 # prevent local imports if __name__ == "__main__": __import__('sys').path.pop(0) import bisect import collections as co import functools as ft import itertools as it import math as mt import os import struct try: import crc32c as crc32c_lib except ModuleNotFoundError: crc32c_lib = None TAG_NULL = 0x0000 ## 0x0000 v--- ---- ---- ---- TAG_CONFIG = 0x0000 ## 0x00tt v--- ---- -ttt tttt TAG_MAGIC = 0x0031 # 0x003r v--- ---- --11 --rr TAG_VERSION = 0x0034 # 0x0034 v--- ---- --11 -1-- TAG_RCOMPAT = 0x0035 # 0x0035 v--- ---- --11 -1-1 TAG_WCOMPAT = 0x0036 # 0x0036 v--- ---- --11 -11- TAG_OCOMPAT = 0x0037 # 0x0037 v--- ---- --11 -111 TAG_GEOMETRY = 0x0038 # 0x0038 v--- ---- --11 1--- TAG_NAMELIMIT = 0x0039 # 0x0039 v--- ---- --11 1--1 TAG_FILELIMIT = 0x003a # 0x003a v--- ---- --11 1-1- TAG_GDELTA = 0x0100 ## 0x01tt v--- ---1 -ttt ttrr TAG_GRMDELTA = 0x0100 # 0x0100 v--- ---1 ---- ---- TAG_NAME = 0x0200 ## 0x02tt v--- --1- -ttt tttt TAG_BNAME = 0x0200 # 0x0200 v--- --1- ---- ---- TAG_REG = 0x0201 # 0x0201 v--- --1- ---- ---1 TAG_DIR = 0x0202 # 0x0202 v--- --1- ---- --1- TAG_STICKYNOTE = 0x0203 # 0x0203 v--- --1- ---- --11 TAG_BOOKMARK = 0x0204 # 0x0204 v--- --1- ---- -1-- TAG_MNAME = 0x0220 # 0x0220 v--- --1- --1- ---- TAG_STRUCT = 0x0300 ## 0x03tt v--- --11 -ttt ttrr TAG_BRANCH = 0x0300 # 0x030r v--- --11 ---- --rr TAG_DATA = 0x0304 # 0x0304 v--- --11 ---- -1-- TAG_BLOCK = 0x0308 # 0x0308 v--- --11 ---- 1err TAG_DID = 0x0314 # 0x0314 v--- --11 ---1 -1-- TAG_BSHRUB = 0x0318 # 0x0318 v--- --11 ---1 1--- TAG_BTREE = 0x031c # 0x031c v--- --11 ---1 11rr TAG_MROOT = 0x0321 # 0x032r v--- --11 --1- --rr TAG_MDIR = 0x0325 # 0x0324 v--- --11 --1- -1rr TAG_MTREE = 0x032c # 0x032c v--- --11 --1- 11rr TAG_ATTR = 0x0400 ## 0x04aa v--- -1-a -aaa aaaa TAG_UATTR = 0x0400 # 0x04aa v--- -1-- -aaa aaaa TAG_SATTR = 0x0500 # 0x05aa v--- -1-1 -aaa aaaa TAG_SHRUB = 0x1000 ## 0x1kkk v--1 kkkk -kkk kkkk TAG_ALT = 0x4000 ## 0x4kkk v1cd kkkk -kkk kkkk TAG_B = 0x0000 TAG_R = 0x2000 TAG_LE = 0x0000 TAG_GT = 0x1000 TAG_CKSUM = 0x3000 ## 0x300p v-11 ---- ---- -pqq TAG_PHASE = 0x0003 TAG_PERTURB = 0x0004 TAG_NOTE = 0x3100 ## 0x3100 v-11 ---1 ---- ---- TAG_ECKSUM = 0x3200 ## 0x3200 v-11 --1- ---- ---- TAG_GCKSUMDELTA = 0x3300 ## 0x3300 v-11 --11 ---- ---- # some ways of block geometry representations # 512 -> 512 # 512x16 -> (512, 16) # 0x200x10 -> (512, 16) def bdgeom(s): s = s.strip() b = 10 if s.startswith('0x') or s.startswith('0X'): s = s[2:] b = 16 elif s.startswith('0o') or s.startswith('0O'): s = s[2:] b = 8 elif s.startswith('0b') or s.startswith('0B'): s = s[2:] b = 2 if 'x' in s: s, s_ = s.split('x', 1) return (int(s, b), int(s_, b)) else: return int(s, b) # parse some rbyd addr encodings # 0xa -> (0xa,) # 0xa.c -> ((0xa, 0xc),) # 0x{a,b} -> (0xa, 0xb) # 0x{a,b}.c -> ((0xa, 0xc), (0xb, 0xc)) def rbydaddr(s): s = s.strip() b = 10 if s.startswith('0x') or s.startswith('0X'): s = s[2:] b = 16 elif s.startswith('0o') or s.startswith('0O'): s = s[2:] b = 8 elif s.startswith('0b') or s.startswith('0B'): s = s[2:] b = 2 trunk = None if '.' in s: s, s_ = s.split('.', 1) trunk = int(s_, b) if s.startswith('{') and '}' in s: ss = s[1:s.find('}')].split(',') else: ss = [s] addr = [] for s in ss: if trunk is not None: addr.append((int(s, b), trunk)) else: addr.append(int(s, b)) return tuple(addr) def crc32c(data, crc=0): if crc32c_lib is not None: return crc32c_lib.crc32c(data, crc) else: crc ^= 0xffffffff for b in data: crc ^= b for j in range(8): crc = (crc >> 1) ^ ((crc & 1) * 0x82f63b78) return 0xffffffff ^ crc def popc(x): return bin(x).count('1') def parity(x): return popc(x) & 1 def fromle32(data, j=0): return struct.unpack('H', data[j:j+2].ljust(2, b'\0'))[0]; d += 2 weight, d_ = fromleb128(data, j+d); d += d_ size, d_ = fromleb128(data, j+d); d += d_ return tag>>15, tag&0x7fff, weight, size, d def frombranch(data, j=0): d = 0 block, d_ = fromleb128(data, j+d); d += d_ trunk, d_ = fromleb128(data, j+d); d += d_ cksum = fromle32(data, j+d); d += 4 return block, trunk, cksum, d def xxd(data, width=16): for i in range(0, len(data), width): yield '%-*s %-*s' % ( 3*width, ' '.join('%02x' % b for b in data[i:i+width]), width, ''.join( b if b >= ' ' and b <= '~' else '.' for b in map(chr, data[i:i+width]))) # human readable tag repr def tagrepr(tag, weight=None, size=None, *, global_=False, toff=None): # null tags if (tag & 0x6fff) == TAG_NULL: return '%snull%s%s' % ( 'shrub' if tag & TAG_SHRUB else '', ' w%d' % weight if weight else '', ' %d' % size if size else '') # config tags elif (tag & 0x6f00) == TAG_CONFIG: return '%s%s%s%s' % ( 'shrub' if tag & TAG_SHRUB else '', 'magic' if (tag & 0xfff) == TAG_MAGIC else 'version' if (tag & 0xfff) == TAG_VERSION else 'rcompat' if (tag & 0xfff) == TAG_RCOMPAT else 'wcompat' if (tag & 0xfff) == TAG_WCOMPAT else 'ocompat' if (tag & 0xfff) == TAG_OCOMPAT else 'geometry' if (tag & 0xfff) == TAG_GEOMETRY else 'namelimit' if (tag & 0xfff) == TAG_NAMELIMIT else 'filelimit' if (tag & 0xfff) == TAG_FILELIMIT else 'config 0x%02x' % (tag & 0xff), ' w%d' % weight if weight else '', ' %s' % size if size is not None else '') # global-state delta tags elif (tag & 0x6f00) == TAG_GDELTA: if global_: return '%s%s%s%s' % ( 'shrub' if tag & TAG_SHRUB else '', 'grm' if (tag & 0xfff) == TAG_GRMDELTA else 'gstate 0x%02x' % (tag & 0xff), ' w%d' % weight if weight else '', ' %s' % size if size is not None else '') else: return '%s%s%s%s' % ( 'shrub' if tag & TAG_SHRUB else '', 'grmdelta' if (tag & 0xfff) == TAG_GRMDELTA else 'gdelta 0x%02x' % (tag & 0xff), ' w%d' % weight if weight else '', ' %s' % size if size is not None else '') # name tags, includes file types elif (tag & 0x6f00) == TAG_NAME: return '%s%s%s%s' % ( 'shrub' if tag & TAG_SHRUB else '', 'bname' if (tag & 0xfff) == TAG_BNAME else 'reg' if (tag & 0xfff) == TAG_REG else 'dir' if (tag & 0xfff) == TAG_DIR else 'stickynote' if (tag & 0xfff) == TAG_STICKYNOTE else 'bookmark' if (tag & 0xfff) == TAG_BOOKMARK else 'mname' if (tag & 0xfff) == TAG_MNAME else 'name 0x%02x' % (tag & 0xff), ' w%d' % weight if weight else '', ' %s' % size if size is not None else '') # structure tags elif (tag & 0x6f00) == TAG_STRUCT: return '%s%s%s%s' % ( 'shrub' if tag & TAG_SHRUB else '', 'branch' if (tag & 0xfff) == TAG_BRANCH else 'data' if (tag & 0xfff) == TAG_DATA else 'block' if (tag & 0xfff) == TAG_BLOCK else 'did' if (tag & 0xfff) == TAG_DID else 'bshrub' if (tag & 0xfff) == TAG_BSHRUB else 'btree' if (tag & 0xfff) == TAG_BTREE else 'mroot' if (tag & 0xfff) == TAG_MROOT else 'mdir' if (tag & 0xfff) == TAG_MDIR else 'mtree' if (tag & 0xfff) == TAG_MTREE else 'struct 0x%02x' % (tag & 0xff), ' w%d' % weight if weight else '', ' %s' % size if size is not None else '') # custom attributes elif (tag & 0x6e00) == TAG_ATTR: return '%s%sattr 0x%02x%s%s' % ( 'shrub' if tag & TAG_SHRUB else '', 's' if tag & 0x100 else 'u', ((tag & 0x100) >> 1) + (tag & 0xff), ' w%d' % weight if weight else '', ' %s' % size if size is not None else '') # alt pointers elif tag & TAG_ALT: return 'alt%s%s 0x%03x%s%s' % ( 'r' if tag & TAG_R else 'b', 'gt' if tag & TAG_GT else 'le', tag & 0x0fff, ' w%d' % weight if weight is not None else '', ' 0x%x' % (0xffffffff & (toff-size)) if size and toff is not None else ' -%d' % size if size else '') # checksum tags elif (tag & 0x7f00) == TAG_CKSUM: return 'cksum%s%s%s%s%s' % ( 'q%d' % (tag & 0x3), 'p' if tag & TAG_PERTURB else '', ' 0x%02x' % (tag & 0xff) if tag & 0xf8 else '', ' w%d' % weight if weight else '', ' %s' % size if size is not None else '') # note tags elif (tag & 0x7f00) == TAG_NOTE: return 'note%s%s%s' % ( ' 0x%02x' % (tag & 0xff) if tag & 0xff else '', ' w%d' % weight if weight else '', ' %s' % size if size is not None else '') # erased-state checksum tags elif (tag & 0x7f00) == TAG_ECKSUM: return 'ecksum%s%s%s' % ( ' 0x%02x' % (tag & 0xff) if tag & 0xff else '', ' w%d' % weight if weight else '', ' %s' % size if size is not None else '') # global-checksum delta tags elif (tag & 0x7f00) == TAG_GCKSUMDELTA: if global_: return 'gcksum%s%s%s' % ( ' 0x%02x' % (tag & 0xff) if tag & 0xff else '', ' w%d' % weight if weight else '', ' %s' % size if size is not None else '') else: return 'gcksumdelta%s%s%s' % ( ' 0x%02x' % (tag & 0xff) if tag & 0xff else '', ' w%d' % weight if weight else '', ' %s' % size if size is not None else '') # unknown tags else: return '0x%04x%s%s' % ( tag, ' w%d' % weight if weight is not None else '', ' %d' % size if size is not None else '') # compute the difference between two paths, returning everything # in a after the paths diverge, as well as the relevant index def pathdelta(a, b): if not isinstance(a, list): a = list(a) i = 0 for a_, b_ in zip(a, b): try: if type(a_) == type(b_) and a_ == b_: i += 1 else: break # treat exceptions here as failure to match, most likely # the compared types are incompatible, it's the caller's # problem except Exception: break return [(i+j, a_) for j, a_ in enumerate(a[i:])] # a simple wrapper over an open file with bd geometry class Bd: def __init__(self, f, block_size=None, block_count=None): self.f = f self.block_size = block_size self.block_count = block_count def __repr__(self): return '<%s %s>' % (self.__class__.__name__, self.repr()) def repr(self): return 'bd %sx%s' % (self.block_size, self.block_count) def read(self, block, off, size): self.f.seek(block*self.block_size + off) return self.f.read(size) def readblock(self, block): self.f.seek(block*self.block_size) return self.f.read(self.block_size) # tagged data in an rbyd class Rattr: def __init__(self, tag, weight, blocks, toff, tdata, data): self.tag = tag self.weight = weight if isinstance(blocks, int): self.blocks = (blocks,) else: self.blocks = blocks self.toff = toff self.tdata = tdata self.data = data @property def block(self): return self.blocks[0] @property def tsize(self): return len(self.tdata) @property def off(self): return self.toff + len(self.tdata) @property def size(self): return len(self.data) def __bytes__(self): return self.data def __repr__(self): return '<%s %s>' % (self.__class__.__name__, self.repr()) def repr(self): return tagrepr(self.tag, self.weight, self.size) def __iter__(self): return iter((self.tag, self.weight, self.data)) def __eq__(self, other): return ((self.tag, self.weight, self.data) == (other.tag, other.weight, other.data)) def __ne__(self, other): return not self.__eq__(other) def __hash__(self): return hash((self.tag, self.weight, self.data)) # convenience for did/name access def _parse_name(self): # note we return a null name for non-name tags, this is so # vestigial names in btree nodes act as a catch-all if (self.tag & 0xff00) != TAG_NAME: did = 0 name = b'' else: did, d = fromleb128(self.data) name = self.data[d:] # cache both self.did = did self.name = name @ft.cached_property def did(self): self._parse_name() return self.did @ft.cached_property def name(self): self._parse_name() return self.name class Ralt: def __init__(self, tag, weight, blocks, toff, tdata, jump, color=None, followed=None): self.tag = tag self.weight = weight if isinstance(blocks, int): self.blocks = (blocks,) else: self.blocks = blocks self.toff = toff self.tdata = tdata self.jump = jump if color is not None: self.color = color else: self.color = 'r' if tag & TAG_R else 'b' self.followed = followed @property def block(self): return self.blocks[0] @property def tsize(self): return len(self.tdata) @property def off(self): return self.toff + len(self.tdata) @property def joff(self): return self.toff - self.jump def __repr__(self): return '<%s %s>' % (self.__class__.__name__, self.repr()) def repr(self): return tagrepr(self.tag, self.weight, self.jump, toff=self.toff) def __iter__(self): return iter((self.tag, self.weight, self.jump)) def __eq__(self, other): return ((self.tag, self.weight, self.jump) == (other.tag, other.weight, other.jump)) def __ne__(self, other): return not self.__eq__(other) def __hash__(self): return hash((self.tag, self.weight, self.jump)) # our core rbyd type class Rbyd: def __init__(self, blocks, trunk, weight, rev, eoff, cksum, data, *, shrub=False, gcksumdelta=None, redund=0): if isinstance(blocks, int): self.blocks = (blocks,) else: self.blocks = blocks self.trunk = trunk self.weight = weight self.rev = rev self.eoff = eoff self.cksum = cksum self.data = data self.shrub = shrub self.gcksumdelta = gcksumdelta self.redund = redund @property def block(self): return self.blocks[0] @property def corrupt(self): # use redund=-1 to indicate corrupt rbyds return self.redund >= 0 def addr(self): if len(self.blocks) == 1: return '0x%x.%x' % (self.block, self.trunk) else: return '0x{%s}.%x' % ( ','.join('%x' % block for block in self.blocks), self.trunk) def __repr__(self): return '<%s %s>' % (self.__class__.__name__, self.repr()) def repr(self): return 'rbyd %s w%s' % (self.addr(), self.weight) def __bool__(self): # use redund=-1 to indicate corrupt rbyds return self.redund >= 0 def __eq__(self, other): return ((frozenset(self.blocks), self.trunk) == (frozenset(other.blocks), other.trunk)) def __ne__(self, other): return not self.__eq__(other) def __hash__(self): return hash((frozenset(self.blocks), self.trunk)) @classmethod def _fetch(cls, data, block, trunk=None): # fetch the rbyd rev = fromle32(data, 0) cksum = 0 cksum_ = crc32c(data[0:4]) cksum__ = cksum_ perturb = False eoff = 0 eoff_ = None j_ = 4 trunk_ = 0 trunk__ = 0 trunk___ = 0 weight = 0 weight_ = 0 weight__ = 0 gcksumdelta = None gcksumdelta_ = None while j_ < len(data) and (not trunk or eoff <= trunk): # read next tag v, tag, w, size, d = fromtag(data, j_) if v != parity(cksum__): break cksum__ ^= 0x00000080 if v else 0 cksum__ = crc32c(data[j_:j_+d], cksum__) j_ += d if not tag & TAG_ALT and j_ + size > len(data): break # take care of cksums if not tag & TAG_ALT: if (tag & 0xff00) != TAG_CKSUM: cksum__ = crc32c(data[j_:j_+size], cksum__) # found a gcksumdelta? if (tag & 0xff00) == TAG_GCKSUMDELTA: gcksumdelta_ = Rattr(tag, w, block, j_-d, data[j_-d:j_], data[j_:j_+size]) # found a cksum? else: # check cksum cksum___ = fromle32(data, j_) if cksum__ != cksum___: break # commit what we have eoff = eoff_ if eoff_ else j_ + size cksum = cksum_ trunk_ = trunk__ weight = weight_ gcksumdelta = gcksumdelta_ gcksumdelta_ = None # update perturb bit perturb = bool(tag & TAG_PERTURB) # revert to data cksum and perturb cksum__ = cksum_ ^ (0xfca42daf if perturb else 0) # evaluate trunks if (tag & 0xf000) != TAG_CKSUM: if not (trunk and j_-d > trunk and not trunk___): # new trunk? if not trunk___: trunk___ = j_-d weight__ = 0 # keep track of weight weight__ += w # end of trunk? if not tag & TAG_ALT: # update trunk/weight unless we found a shrub or an # explicit trunk (which may be a shrub) is requested if not tag & TAG_SHRUB or trunk___ == trunk: trunk__ = trunk___ weight_ = weight__ # keep track of eoff for best matching trunk if trunk and j_ + size > trunk: eoff_ = j_ + size eoff = eoff_ cksum = cksum__ ^ ( 0xfca42daf if perturb else 0) trunk_ = trunk__ weight = weight_ gcksumdelta = gcksumdelta_ trunk___ = 0 # update canonical checksum, xoring out any perturb state cksum_ = cksum__ ^ (0xfca42daf if perturb else 0) if not tag & TAG_ALT: j_ += size return cls(block, trunk_, weight, rev, eoff, cksum, data, gcksumdelta=gcksumdelta, redund=0 if trunk_ else -1) @classmethod def fetch(cls, bd, blocks, trunk=None): # multiple blocks? if not isinstance(blocks, int): # fetch all blocks rbyds = [cls.fetch(bd, block, trunk) for block in blocks] # determine most recent revision/trunk rev, trunk = None, None for rbyd in rbyds: # compare with sequence arithmetic if rbyd and ( rev is None or not ((rbyd.rev - rev) & 0x80000000) or (rbyd.rev == rev and rbyd.trunk > trunk)): rev, trunk = rbyd.rev, rbyd.trunk # sort for reproducibility rbyds.sort(key=lambda rbyd: ( # prioritize valid redund blocks 0 if rbyd and rbyd.rev == rev and rbyd.trunk == trunk else 1, # default to sorting by block rbyd.block)) # choose an active rbyd rbyd = rbyds[0] # keep track of the other blocks rbyd.blocks = tuple(rbyd.block for rbyd in rbyds) # keep track of how many redund blocks are valid rbyd.redund = -1 + sum(1 for rbyd in rbyds if rbyd and rbyd.rev == rev and rbyd.trunk == trunk) # and patch the gcksumdelta if we have one if rbyd.gcksumdelta is not None: rbyd.gcksumdelta.blocks = rbyd.blocks return rbyd # seek/read the block block = blocks data = bd.readblock(block) # fetch the rbyd return cls._fetch(data, block, trunk) @classmethod def fetchck(cls, bd, blocks, trunk, weight, cksum): # try to fetch the rbyd normally rbyd = cls.fetch(bd, blocks, trunk) # cksum mismatch? trunk/weight mismatch? if (rbyd.cksum != cksum or rbyd.trunk != trunk or rbyd.weight != weight): # mark as corrupt and keep track of expected trunk/weight rbyd.redund = -1 rbyd.trunk = trunk rbyd.weight = weight return rbyd @classmethod def fetchshrub(cls, rbyd, trunk): # steal the original rbyd's data # # this helps avoid race conditions with cksums and stuff shrub = cls._fetch(rbyd.data, rbyd.block, trunk) shrub.blocks = rbyd.blocks shrub.shrub = True return shrub def lookupnext(self, rid, tag=None, *, path=False): if not self or rid >= self.weight: if path: return None, None, [] else: return None, None tag = max(tag or 0, 0x1) lower = 0 upper = self.weight path_ = [] # descend down tree j = self.trunk while True: _, alt, w, jump, d = fromtag(self.data, j) # found an alt? if alt & TAG_ALT: # follow? if ((rid, tag & 0xfff) > (upper-w-1, alt & 0xfff) if alt & TAG_GT else ((rid, tag & 0xfff) <= (lower+w-1, alt & 0xfff))): lower += upper-lower-w if alt & TAG_GT else 0 upper -= upper-lower-w if not alt & TAG_GT else 0 j = j - jump if path: # figure out which color if alt & TAG_R: _, nalt, _, _, _ = fromtag(self.data, j+jump+d) if nalt & TAG_R: color = 'y' else: color = 'r' else: color = 'b' path_.append(Ralt( alt, w, self.blocks, j+jump, self.data[j+jump:j+jump+d], jump, color=color, followed=True)) # stay on path else: lower += w if not alt & TAG_GT else 0 upper -= w if alt & TAG_GT else 0 j = j + d if path: # figure out which color if alt & TAG_R: _, nalt, _, _, _ = fromtag(self.data, j) if nalt & TAG_R: color = 'y' else: color = 'r' else: color = 'b' path_.append(Ralt( alt, w, self.blocks, j-d, self.data[j-d:j], jump, color=color, followed=False)) # found tag else: rid_ = upper-1 tag_ = alt w_ = upper-lower if not tag_ or (rid_, tag_) < (rid, tag): if path: return None, None, path_ else: return None, None rattr_ = Rattr(tag_, w_, self.blocks, j, self.data[j:j+d], self.data[j+d:j+d+jump]) if path: return rid_, rattr_, path_ else: return rid_, rattr_ def lookup(self, rid, tag=None, mask=None, *, path=False): if tag is None: tag, mask = 0, 0xffff if mask is None: mask = 0 r = self.lookupnext(rid, tag & ~mask, path=path) if path: rid_, rattr_, path_ = r else: rid_, rattr_ = r if (rid_ is None or rid_ != rid or (rattr_.tag & ~mask & 0xfff) != (tag & ~mask & 0xfff)): if path: return None, path_ else: return None if path: return rattr_, path_ else: return rattr_ def rids(self, *, path=False): rid = -1 while True: r = self.lookupnext(rid, path=path) if path: rid, name, path_ = r else: rid, name = r # found end of tree? if rid is None: break if path: yield rid, name, path_ else: yield rid, name rid += 1 def rattrs(self, rid=None, tag=None, mask=None, *, path=False): if rid is None: rid, tag = -1, 0 while True: r = self.lookupnext(rid, tag+0x1, path=path) if path: rid, rattr, path_ = r else: rid, rattr = r # found end of tree? if rid is None: break if path: yield rid, rattr, path_ else: yield rid, rattr tag = rattr.tag else: if tag is None: tag, mask = 0, 0xffff if mask is None: mask = 0 tag_ = max((tag & ~mask) - 1, 0) while True: r = self.lookupnext(rid, tag_+0x1, path=path) if path: rid_, rattr_, path_ = r else: rid_, rattr_ = r # found end of tree? if (rid_ is None or rid_ != rid or (rattr_.tag & ~mask & 0xfff) != (tag & ~mask & 0xfff)): break if path: yield rattr_, path_ else: yield rattr_ tag_ = rattr_.tag # lookup by name def namelookup(self, did, name): # binary search best = None, None lower = 0 upper = self.weight while lower < upper: rid, name_ = self.lookupnext( lower + (upper-1-lower)//2) if rid is None: break # bisect search space if (name_.did, name_.name) > (did, name): upper = rid-(name_.weight-1) elif (name_.did, name_.name) < (did, name): lower = rid + 1 # keep track of best match best = rid, name_ else: # found a match return rid, name_ return best # our rbyd btree type class Btree: def __init__(self, bd, rbyd): self.bd = bd self.rbyd = rbyd @property def block(self): return self.rbyd.block @property def blocks(self): return self.rbyd.blocks @property def trunk(self): return self.rbyd.trunk @property def weight(self): return self.rbyd.weight @property def rev(self): return self.rbyd.rev @property def cksum(self): return self.rbyd.cksum @property def shrub(self): return self.rbyd.shrub def addr(self): return self.rbyd.addr() def __repr__(self): return '<%s %s>' % (self.__class__.__name__, self.repr()) def repr(self): return 'btree %s w%s' % (self.addr(), self.weight) def __eq__(self, other): return self.rbyd == other.rbyd def __ne__(self, other): return not self.__eq__(other) def __hash__(self): return hash(self.rbyd) @classmethod def fetch(cls, bd, blocks, trunk=None): # rbyd fetch does most of the work here rbyd = Rbyd.fetch(bd, blocks, trunk) return cls(bd, rbyd) @classmethod def fetchck(cls, bd, blocks, trunk, weight, cksum): # rbyd fetchck does most of the work here rbyd = Rbyd.fetchck(bd, blocks, trunk, weight, cksum) return cls(bd, rbyd) @classmethod def fetchshrub(cls, bd, rbyd, trunk): shrub = Rbyd.fetchshrub(rbyd, trunk) return cls(bd, shrub) def lookupleaf(self, bid, *, path=False, depth=None): if not self or bid >= self.weight: if path: return None, None, None, None, [] else: return None, None, None, None rbyd = self.rbyd rid = bid depth_ = 1 path_ = [] while True: # corrupt branch? if not rbyd: if path: return bid, rbyd, rid, None, path_ else: return bid, rbyd, rid, None # first tag indicates the branch's weight rid_, name_ = rbyd.lookupnext(rid) if rid_ is None: if path: return None, None, None, None, path_ else: return None, None, None, None # keep track of path if path: path_.append((bid + (rid_-rid), rbyd, rid_, name_)) # find branch tag if there is one branch_ = rbyd.lookup(rid_, TAG_BRANCH, 0x3) # descend down branch? if branch_ is not None and ( not depth or depth_ < depth): block, trunk, cksum, _ = frombranch(branch_.data) rbyd = Rbyd.fetchck(self.bd, block, trunk, name_.weight, cksum) rid -= (rid_-(name_.weight-1)) depth_ += 1 else: if path: return bid + (rid_-rid), rbyd, rid_, name_, path_ else: return bid + (rid_-rid), rbyd, rid_, name_ # the non-leaf variants discard the rbyd info, these can be a bit # more convenient, but at a performance cost def lookupnext(self, bid, *, path=False, depth=None): # just discard the rbyd info r = self.lookupleaf(bid, path=path, depth=depth) if path: bid, rbyd, rid, name, path_ = r else: bid, rbyd, rid, name = r if path: return bid, name, path_ else: return bid, name def lookup(self, bid, tag=None, mask=None, *, path=False, depth=None): # lookup rbyd in btree # # note this function expects bid to be known, use lookupnext # first if you don't care about the exact bid (or better yet, # lookupleaf and call lookup on the returned rbyd) # # this matches rbyd's lookup behavior, which needs a known rid # to avoid a double lookup r = self.lookupleaf(bid, path=path, depth=depth) if path: bid_, rbyd_, rid_, name_, path_ = r else: bid_, rbyd_, rid_, name_ = r if bid_ is None or bid_ != bid: if path: return None, path_ else: return None # lookup tag in rbyd rattr_ = rbyd_.lookup(rid_, tag, mask) if rattr_ is None: if path: return None, path_ else: return None if path: return rattr_, path_ else: return rattr_ # note leaves only iterates over leaf rbyds, whereas traverse # traverses all rbyds def leaves(self, *, path=False, depth=None): # include our root rbyd even if the weight is zero if self.weight == 0: if path: yield -1, self.rbyd, [] else: yield -1, self.rbyd return bid = 0 while True: r = self.lookupleaf(bid, path=path, depth=depth) if r: bid, rbyd, rid, name, path_ = r else: bid, rbyd, rid, name = r if bid is None: break if path: yield (bid-rid + (rbyd.weight-1), rbyd, # path tail is usually redundant unless corrupt path_[:-1] if path_ and path_[-1][1] == rbyd else path_) else: yield bid-rid + (rbyd.weight-1), rbyd bid += rbyd.weight - rid + 1 def traverse(self, *, path=False, depth=None): ptrunk_ = [] for bid, rbyd, path_ in self.leaves( path=True, depth=depth): # we only care about the rbyds here trunk_ = ([(bid_-rid_ + (rbyd_.weight-1), rbyd_) for bid_, rbyd_, rid_, name_ in path_] + [(bid, rbyd)]) for d, (bid_, rbyd_) in pathdelta( trunk_, ptrunk_): # but include branch rids in the path if requested if path: yield bid_, rbyd_, path_[:d] else: yield bid_, rbyd_ ptrunk_ = trunk_ # note bids/rattrs do _not_ include corrupt btree nodes! def bids(self, *, leaves=False, path=False, depth=None): for r in self.leaves( path=path, depth=depth): if path: bid, rbyd, path_ = r else: bid, rbyd = r for rid, name in rbyd.rids(): bid_ = bid-(rbyd.weight-1) + rid if leaves: if path: yield (bid_, rbyd, rid, name, path_+[(bid_, rbyd, rid, name)]) else: yield bid_, rbyd, rid, name else: if path: yield (bid_, name, path_+[(bid_, rbyd, rid, name)]) else: yield bid_, name def rattrs(self, bid=None, tag=None, mask=None, *, leaves=False, path=False, depth=None): if bid is None: for r in self.leaves( path=path, depth=depth): if path: bid, rbyd, path_ = r else: bid, rbyd = r for rid, name in rbyd.rids(): bid_ = bid-(rbyd.weight-1) + rid for rattr in rbyd.rattrs(rid): if leaves: if path: yield (bid_, rbyd, rid, rattr, path_+[(bid_, rbyd, rid, name)]) else: yield bid_, rbyd, rid, rattr else: if path: yield (bid_, rattr, path_+[(bid_, rbyd, rid, name)]) else: yield bid_, rattr else: r = self.lookupleaf(bid, path=path, depth=depth) if path: bid, rbyd, rid, name, path_ = r else: bid, rbyd, rid, name = r if bid is None: return for rattr in rbyd.rattrs(rid, tag, mask): if leaves: if path: yield rbyd, rid, rattr, path_ else: yield rbyd, rid, rattr else: if path: yield rattr, path_ else: yield rattr # lookup by name def namelookupleaf(self, did, name, *, path=False, depth=None): rbyd = self.rbyd bid = 0 depth_ = 1 path_ = [] while True: # corrupt branch? if not rbyd: bid_ = bid+(rbyd.weight-1) if path: return bid_, rbyd, rbyd.weight-1, None, path_ else: return bid_, rbyd, rbyd.weight-1, None rid_, name_ = rbyd.namelookup(did, name) # keep track of path if path: path_.append((bid + rid_, rbyd, rid_, name_)) # find branch tag if there is one branch_ = rbyd.lookup(rid_, TAG_BRANCH, 0x3) # found another branch if branch_ is not None and ( not depth or depth_ < depth): block, trunk, cksum, _ = frombranch(branch_.data) rbyd = Rbyd.fetchck(self.bd, block, trunk, name_.weight, cksum) # update our bid bid += rid_ - (name_.weight-1) depth_ += 1 # found best match else: if path: return bid + rid_, rbyd, rid_, name_, path_ else: return bid + rid_, rbyd, rid_, name_ def namelookup(self, bid, *, path=False, depth=None): # just discard the rbyd info r = self.namelookupleaf(did, name, path=path, depth=depth) if path: bid, rbyd, rid, name, path_ = r else: bid, rbyd, rid, name = r if path: return bid, name, path_ else: return bid, name # tree renderer class TreeArt: # tree branches are an abstract thing for tree rendering class Branch(co.namedtuple('Branch', ['a', 'b', 'z', 'color'])): __slots__ = () def __new__(cls, a, b, z=0, color='b'): # a and b are context specific return super().__new__(cls, a, b, z, color) def __repr__(self): return '%s(%s, %s, %s, %s)' % ( self.__class__.__name__, self.a, self.b, self.z, self.color) # don't include color in branch comparisons, or else our tree # renderings can end up with inconsistent colors between runs def __eq__(self, other): return (self.a, self.b, self.z) == (other.a, other.b, other.z) def __ne__(self, other): return (self.a, self.b, self.z) != (other.a, other.b, other.z) def __hash__(self): return hash((self.a, self.b, self.z)) # also order by z first, which can be useful for reproducibly # prioritizing branches when simplifying trees def __lt__(self, other): return (self.z, self.a, self.b) < (other.z, other.a, other.b) def __le__(self, other): return (self.z, self.a, self.b) <= (other.z, other.a, other.b) def __gt__(self, other): return (self.z, self.a, self.b) > (other.z, other.a, other.b) def __ge__(self, other): return (self.z, self.a, self.b) >= (other.z, other.a, other.b) # apply a function to a/b while trying to avoid copies def map(self, filter_, map_=None): if map_ is None: filter_, map_ = None, filter_ a = self.a if filter_ is None or filter_(a): a = map_(a) b = self.b if filter_ is None or filter_(b): b = map_(b) if a != self.a or b != self.b: return self.__class__( a if a != self.a else self.a, b if b != self.b else self.b, self.z, self.color) else: return self def __init__(self, tree): self.tree = tree self.depth = max((t.z+1 for t in tree), default=0) if self.depth > 0: self.width = 2*self.depth + 2 else: self.width = 0 def __iter__(self): return iter(self.tree) def __bool__(self): return bool(self.tree) def __len__(self): return len(self.tree) # render an rbyd rbyd tree for debugging @classmethod def _fromrbydrtree(cls, rbyd, **args): trunks = co.defaultdict(lambda: (-1, 0)) alts = co.defaultdict(lambda: {}) for rid, rattr, path in rbyd.rattrs(path=True): # keep track of trunks/alts trunks[rattr.toff] = (rid, rattr.tag) for ralt in path: if ralt.followed: alts[ralt.toff] |= {'f': ralt.joff, 'c': ralt.color} else: alts[ralt.toff] |= {'nf': ralt.off, 'c': ralt.color} if args.get('tree_rbyd'): # treat unreachable alts as converging paths for j_, alt in alts.items(): if 'f' not in alt: alt['f'] = alt['nf'] elif 'nf' not in alt: alt['nf'] = alt['f'] else: # prune any alts with unreachable edges pruned = {} for j, alt in alts.items(): if 'f' not in alt: pruned[j] = alt['nf'] elif 'nf' not in alt: pruned[j] = alt['f'] for j in pruned.keys(): del alts[j] for j, alt in alts.items(): while alt['f'] in pruned: alt['f'] = pruned[alt['f']] while alt['nf'] in pruned: alt['nf'] = pruned[alt['nf']] # find the trunk and depth of each alt def rec_trunk(j): if j not in alts: return trunks[j] else: if 'nft' not in alts[j]: alts[j]['nft'] = rec_trunk(alts[j]['nf']) return alts[j]['nft'] for j in alts.keys(): rec_trunk(j) for j, alt in alts.items(): if alt['f'] in alts: alt['ft'] = alts[alt['f']]['nft'] else: alt['ft'] = trunks[alt['f']] def rec_height(j): if j not in alts: return 0 else: if 'h' not in alts[j]: alts[j]['h'] = max( rec_height(alts[j]['f']), rec_height(alts[j]['nf'])) + 1 return alts[j]['h'] for j in alts.keys(): rec_height(j) t_depth = max((alt['h']+1 for alt in alts.values()), default=0) # convert to more general tree representation tree = set() for j, alt in alts.items(): # note all non-trunk edges should be colored black tree.add(cls.Branch( alt['nft'], alt['nft'], t_depth-1 - alt['h'], alt['c'])) if alt['ft'] != alt['nft']: tree.add(cls.Branch( alt['nft'], alt['ft'], t_depth-1 - alt['h'], 'b')) return cls(tree) # render an rbyd btree tree for debugging @classmethod def _fromrbydbtree(cls, rbyd, **args): # for rbyds this is just a pointer to every rid tree = set() root = None for rid, name in rbyd.rids(): b = (rid, name.tag) if root is None: root = b tree.add(cls.Branch(root, b)) return cls(tree) # render an rbyd tree for debugging @classmethod def fromrbyd(cls, rbyd, **args): if args.get('tree_btree'): return cls._fromrbydbtree(rbyd, **args) else: return cls._fromrbydrtree(rbyd, **args) # render some nice ascii trees def repr(self, x, color=False): if self.depth == 0: return '' def branchrepr(tree, x, d, was): for t in tree: if t.z == d and t.b == x: if any(t.z == d and t.a == x for t in tree): return '+-', t.color, t.color elif any(t.z == d and x > min(t.a, t.b) and x < max(t.a, t.b) for t in tree): return '|-', t.color, t.color elif t.a < t.b: return '\'-', t.color, t.color else: return '.-', t.color, t.color for t in tree: if t.z == d and t.a == x: return '+ ', t.color, None for t in tree: if (t.z == d and x > min(t.a, t.b) and x < max(t.a, t.b)): return '| ', t.color, was if was: return '--', was, was return ' ', None, None trunk = [] was = None for d in range(self.depth): t, c, was = branchrepr(self.tree, x, d, was) trunk.append('%s%s%s%s' % ( '\x1b[33m' if color and c == 'y' else '\x1b[31m' if color and c == 'r' else '\x1b[1;30m' if color and c == 'b' else '', t, ('>' if was else ' ') if d == self.depth-1 else '', '\x1b[m' if color and c else '')) return '%s ' % ''.join(trunk) # some more renderers # render a btree rbyd tree for debugging @classmethod def _treeartfrombtreertree(cls, btree, *, depth=None, inner=False, **args): # precompute rbyd trees so we know the max depth at each layer # to nicely align trees rtrees = {} rdepths = {} for bid, rbyd, path in btree.traverse(path=True, depth=depth): if not rbyd: continue rtree = cls.fromrbyd(rbyd, **args) rtrees[rbyd] = rtree rdepths[len(path)] = max(rdepths.get(len(path), 0), rtree.depth) # map rbyd branches into our btree space tree = set() for bid, rbyd, path in btree.traverse(path=True, depth=depth): if not rbyd: continue # yes we can find new rbyds if disk is being mutated, just # ignore these if rbyd not in rtrees: continue rtree = rtrees[rbyd] rz = max((t.z+1 for t in rtree), default=0) d = sum(rdepths[d]+1 for d in range(len(path))) # map into our btree space for t in rtree: # note we adjust our bid to be left-leaning, this allows # a global order and makes tree rendering quite a bit easier a_rid, a_tag = t.a b_rid, b_tag = t.b _, (_, a_w, _) = rbyd.lookupnext(a_rid) _, (_, b_w, _) = rbyd.lookupnext(b_rid) tree.add(cls.Branch( (bid-(rbyd.weight-1)+a_rid-(a_w-1), len(path), a_tag), (bid-(rbyd.weight-1)+b_rid-(b_w-1), len(path), b_tag), d + rdepths[len(path)]-rz + t.z, t.color)) # connect rbyd branches to rbyd roots if path: l_bid, l_rbyd, l_rid, l_name = path[-1] l_branch = l_rbyd.lookup(l_rid, TAG_BRANCH, 0x3) if rtree: r_rid, r_tag = min(rtree, key=lambda t: t.z).a _, (_, r_w, _) = rbyd.lookupnext(r_rid) else: r_rid, (r_tag, r_w, _) = rbyd.lookupnext(-1) tree.add(cls.Branch( (l_bid-(l_name.weight-1), len(path)-1, l_branch.tag), (bid-(rbyd.weight-1)+r_rid-(r_w-1), len(path), r_tag), d-1)) # remap branches to leaves if we aren't showing inner branches if not inner: # step through each btree layer backwards b_depth = max((t.a[1]+1 for t in tree), default=0) for d in reversed(range(b_depth-1)): # find bid ranges at this level bids = set() for t in tree: if t.b[1] == d: bids.add(t.b[0]) bids = sorted(bids) # find the best root for each bid range roots = {} for i in range(len(bids)): for t in tree: if (t.a[1] > d and t.a[0] >= bids[i] and (i == len(bids)-1 or t.a[0] < bids[i+1]) and (bids[i] not in roots or t < roots[bids[i]])): roots[bids[i]] = t # remap branches to leaf-roots tree = {t.map( lambda x: x[1] == d and x[0] in roots, lambda x: roots[x[0]].a) for t in tree} return cls(tree) # render a btree btree tree for debugging @classmethod def _treeartfrombtreebtree(cls, btree, *, depth=None, inner=False, **args): # find all branches tree = set() root = None branches = {} for bid, name, path in btree.bids( path=True, depth=depth): # create branch for each jump in path # # note we adjust our bid to be left-leaning, this allows # a global order and makes tree rendering quite a bit easier a = root for d, (bid_, rbyd_, rid_, name_) in enumerate(path): # map into our btree space bid__ = bid_-(name_.weight-1) b = (bid__, d, name_.tag) # remap branches to leaves if we aren't showing inner # branches if not inner: if b not in branches: bid_, rbyd_, rid_, name_ = path[-1] bid__ = bid_-(name_.weight-1) branches[b] = (bid__, len(path)-1, name_.tag) b = branches[b] # render the root path on first rid, this is arbitrary if root is None: root, a = b, b tree.add(cls.Branch(a, b, d)) a = b return cls(tree) # render a btree tree for debugging @classmethod def treeartfrombtree(cls, btree, **args): if args.get('tree_btree'): return cls._frombtreebtree(btree, **args) else: return cls._frombtreertree(btree, **args) TreeArt._frombtreertree = _treeartfrombtreertree TreeArt._frombtreebtree = _treeartfrombtreebtree TreeArt.frombtree = treeartfrombtree def main(disk, roots=None, *, trunk=None, block_size=None, block_count=None, quiet=False, color='auto', **args): # figure out what color should be if color == 'auto': color = sys.stdout.isatty() elif color == 'always': color = True else: color = False # is bd geometry specified? if isinstance(block_size, tuple): block_size, block_count_ = block_size if block_count is None: block_count = block_count_ # flatten roots, default to block 0 roots = list(it.chain.from_iterable(roots)) if roots else [0] # roots may also encode trunks roots, trunk = ( [block[0] if isinstance(block, tuple) else block for block in roots], trunk if trunk is not None else ft.reduce( lambda x, y: y, (block[1] for block in roots if isinstance(block, tuple)), None)) # we seek around a bunch, so just keep the disk open with open(disk, 'rb') as f: # if block_size is omitted, assume the block device is one big block if block_size is None: f.seek(0, os.SEEK_END) block_size = f.tell() # fetch the btree bd = Bd(f, block_size, block_count) btree = Btree.fetch(bd, roots, trunk) # print some information about the btree if not quiet: print('btree %s w%d, rev %08x, cksum %08x' % ( btree.addr(), btree.weight, btree.rev, btree.cksum)) # precompute tree renderings t_width = 0 if (args.get('tree') or args.get('tree_rbyd') or args.get('tree_btree')): treeart = TreeArt.frombtree(btree, **args) t_width = treeart.width # dynamically size the id field w_width = mt.ceil(mt.log10(max(1, btree.weight)+1)) # prbyd keeps track of the last rendered rbyd, we update # in dbg_branch to always print interleaved addresses prbyd = None def dbg_branch(d, bid, rbyd, rid, name): nonlocal prbyd # show human-readable representation for rattr in rbyd.rattrs(rid): print('%10s %s%*s %-*s %s' % ( '%04x.%04x:' % (rbyd.block, rbyd.trunk) if prbyd is None or rbyd != prbyd else '', treeart.repr((bid-(name.weight-1), d, rattr.tag), color) if args.get('tree') or args.get('tree_rbyd') or args.get('tree_btree') else '', 2*w_width+1, '%d-%d' % (bid-(rattr.weight-1), bid) if rattr.weight > 1 else bid if rattr.weight > 0 else '', 21+w_width, rattr.repr(), next(xxd(rattr.data, 8), '') if not args.get('raw') and not args.get('no_truncate') else '')) prbyd = rbyd # show on-disk encoding of tags/data if args.get('raw'): for o, line in enumerate(xxd(rattr.tdata)): print('%9s: %*s%*s %s' % ( '%04x' % (rattr.toff + o*16), t_width, '', 2*w_width+1, '', line)) if args.get('raw') or args.get('no_truncate'): for o, line in enumerate(xxd(rattr.data)): print('%9s: %*s%*s %s' % ( '%04x' % (rattr.off + o*16), t_width, '', 2*w_width+1, '', line)) # traverse and print entries ppath = [] corrupted = False for bid, rbyd, path in btree.leaves( path=True, depth=args.get('depth')): # print inner branches if requested if args.get('inner') and not quiet: for d, (bid_, rbyd_, rid_, name_) in pathdelta( path, ppath): dbg_branch(d, bid_, rbyd_, rid_, name_) ppath = path # corrupted? try to keep printing the tree if not rbyd: if not quiet: print('%s%04x.%04x: %*s%s%s' % ( '\x1b[31m' if color else '', rbyd.block, rbyd.trunk, t_width, '', '(corrupted rbyd %s)' % rbyd.addr(), '\x1b[m' if color else '')) prbyd = None corrupted = True continue if not quiet: for rid, name in rbyd.rids(): bid_ = bid-(rbyd.weight-1) + rid # show the leaf entry/branch dbg_branch(len(path), bid_, rbyd, rid, name) if args.get('error_on_corrupt') and corrupted: sys.exit(2) if __name__ == "__main__": import argparse import sys parser = argparse.ArgumentParser( description="Debug rbyd B-trees.", allow_abbrev=False) parser.add_argument( 'disk', help="File containing the block device.") parser.add_argument( 'roots', nargs='*', type=rbydaddr, help="Block address of the roots of the tree.") parser.add_argument( '--trunk', type=lambda x: int(x, 0), help="Use this offset as the trunk of the tree.") parser.add_argument( '-b', '--block-size', type=bdgeom, help="Block size/geometry in bytes. Accepts x.") parser.add_argument( '--block-count', type=lambda x: int(x, 0), help="Block count in blocks.") parser.add_argument( '-q', '--quiet', action='store_true', help="Don't show anything, useful when checking for errors.") parser.add_argument( '--color', choices=['never', 'always', 'auto'], default='auto', help="When to use terminal colors. Defaults to 'auto'.") parser.add_argument( '-x', '--raw', action='store_true', help="Show the raw data including tag encodings.") parser.add_argument( '-T', '--no-truncate', action='store_true', help="Don't truncate, show the full contents.") parser.add_argument( '-t', '--tree', action='store_true', help="Show the rbyd tree.") parser.add_argument( '-R', '--tree-rbyd', action='store_true', help="Show the full rbyd tree.") parser.add_argument( '-B', '--tree-btree', action='store_true', help="Show a simplified btree tree.") parser.add_argument( '-i', '--inner', action='store_true', help="Show inner branches.") parser.add_argument( '-z', '--depth', nargs='?', type=lambda x: int(x, 0), const=0, help="Depth of the btree to show.") parser.add_argument( '-e', '--error-on-corrupt', action='store_true', help="Error if B-tree is corrupt.") sys.exit(main(**{k: v for k, v in vars(parser.parse_intermixed_args()).items() if v is not None}))