#!/usr/bin/env python3 import bisect import itertools as it import math as m import os import struct COLORS = [ '34', # blue '31', # red '32', # green '35', # purple '33', # yellow '36', # cyan ] TAG_UNREACHABLE = 0x0002 TAG_NAME = 0x1000 TAG_BNAME = 0x1000 TAG_REG = 0x1010 TAG_DIR = 0x1020 TAG_STRUCT = 0x3000 TAG_INLINED = 0x3000 TAG_BLOCK = 0x3100 TAG_BRANCH = 0x3200 TAG_BTREE = 0x3300 TAG_UATTR = 0x4000 TAG_ALT = 0x0008 TAG_CRC = 0x0004 TAG_FCRC = 0x1004 def blocklim(s): if '.' in s: s = s.strip() b = 10 if s.startswith('0x') or s.startswith('0X'): s = s[2:] b = 16 elif s.startswith('0o') or s.startswith('0O'): s = s[2:] b = 8 elif s.startswith('0b') or s.startswith('0B'): s = s[2:] b = 2 s0, s1 = s.split('.', 1) return int(s0, b), int(s1, b) else: return int(s, 0) def crc32c(data, crc=0): crc ^= 0xffffffff for b in data: crc ^= b for j in range(8): crc = (crc >> 1) ^ ((crc & 1) * 0x82f63b78) return 0xffffffff ^ crc def fromle16(data): if len(data) < 2: return 0 return struct.unpack('= ' ' and b <= '~' else '.' for b in map(chr, data[i:i+width]))) def tagrepr(tag, id, size, off=None): if (tag & 0xfffe) == TAG_UNREACHABLE: return 'unreachable id%d%s' % ( id, ' %d' % size if size else '') elif (tag & 0xf00c) == TAG_NAME: return '%s%s id%d %d' % ( 'rm' if tag & 0x2 else '', 'bname' if (tag & 0xfffe) == TAG_BNAME else 'reg' if (tag & 0xfffe) == TAG_REG else 'dir' if (tag & 0xfffe) == TAG_DIR else 'name 0x%02x' % ((tag & 0x0ff0) >> 4), id, size) elif (tag & 0xf00c) == TAG_STRUCT: return '%s%s id%d %d' % ( 'rm' if tag & 0x2 else '', 'inlined' if (tag & 0xfffe) == TAG_INLINED else 'block' if (tag & 0xfffe) == TAG_BLOCK else 'branch' if (tag & 0xfffe) == TAG_BRANCH else 'btree' if (tag & 0xfffe) == TAG_BTREE else 'struct 0x%02x' % ((tag & 0x0ff0) >> 4), id, size) elif (tag & 0xf00c) == TAG_UATTR: return '%suattr 0x%02x%s%s' % ( 'rm' if tag & 0x2 else '', (tag & 0x0ff0) >> 4, ' id%d' % id if id != -1 else '', ' %d' % size if not tag & 0x2 or size else '') elif (tag & 0xf00e) == TAG_CRC: return 'crc%x%s %d' % ( 1 if tag & 0x10 else 0, ' 0x%02x' % id if id != -1 else '', size) elif (tag & 0xfffe) == TAG_FCRC: return 'fcrc%s %d' % ( ' 0x%02x' % id if id != -1 else '', size) elif tag & 0x8: return 'alt%s%s 0x%x w%d %s' % ( 'r' if tag & 0x2 else 'b', 'gt' if tag & 0x4 else 'le', tag & 0xfff0, id, '0x%x' % (0xffffffff & (off-size)) if off is not None else '-%d' % off) else: return '0x%04x id%d %d' % (tag, id, size) def show_log(block_size, data, rev, off, *, color=False, **args): crc = crc32c(data[0:4]) # preprocess jumps if args.get('jumps'): jumps = [] j_ = 4 while j_ < (block_size if args.get('all') else off): j = j_ v, tag, id, size, delta = fromtag(data[j_:]) j_ += delta if not tag & 0x8: j_ += size if tag & 0x8: # figure out which alt color if tag & 0x2: _, ntag, _, _, _ = fromtag(data[j_:]) if ntag & 0x2: jumps.append((j, j-size, 0, 'y')) else: jumps.append((j, j-size, 0, 'r')) else: jumps.append((j, j-size, 0, 'b')) # figure out x-offsets to avoid collisions between jumps for j in range(len(jumps)): a, b, _, c = jumps[j] x = 0 while any( max(a, b) >= min(a_, b_) and max(a_, b_) >= min(a, b) and x == x_ for a_, b_, x_, _ in jumps[:j]): x += 1 jumps[j] = a, b, x, c def jumprepr(j): # render jumps chars = {} for a, b, x, c in jumps: c_start = ( '\x1b[33m' if color and c == 'y' else '\x1b[31m' if color and c == 'r' else '\x1b[90m' if color else '') c_stop = '\x1b[m' if color else '' if j == a: for x_ in range(2*x+1): chars[x_] = '%s-%s' % (c_start, c_stop) chars[2*x+1] = '%s\'%s' % (c_start, c_stop) elif j == b: for x_ in range(2*x+1): chars[x_] = '%s-%s' % (c_start, c_stop) chars[2*x+1] = '%s.%s' % (c_start, c_stop) chars[0] = '%s<%s' % (c_start, c_stop) elif j >= min(a, b) and j <= max(a, b): chars[2*x+1] = '%s|%s' % (c_start, c_stop) return ''.join(chars.get(x, ' ') for x in range(max(chars.keys(), default=0)+1)) # preprocess lifetimes lifetime_width = 0 if args.get('lifetimes'): class Lifetime: color_i = 0 def __init__(self, j): self.origin = j self.tags = set() self.color = COLORS[self.__class__.color_i] self.__class__.color_i = ( self.__class__.color_i + 1) % len(COLORS) def add(self, j): self.tags.add(j) def __bool__(self): return bool(self.tags) # first figure out where each id comes from weights = [] lifetimes = [] def index(weights, id): for i, w in enumerate(weights): if id < w: return i, id id -= w return len(weights), 0 checkpoint_js = [0] checkpoints = [([], [], set(), set(), set())] def checkpoint(j, weights, lifetimes, grows, shrinks, tags): checkpoint_js.append(j) checkpoints.append(( weights.copy(), lifetimes.copy(), grows, shrinks, tags)) lower_, upper_ = 0, 0 weight_ = 0 wastrunk = False j_ = 4 while j_ < (block_size if args.get('all') else off): j = j_ v, tag, id, size, delta = fromtag(data[j_:]) j_ += delta if not tag & 0x8: j_ += size # find trunk if not wastrunk and (tag & 0xc) != 0x4: lower_, upper_ = 0, 0 wastrunk = not not tag & 0x8 # keep track of weight if tag & 0x8: if tag & 0x4: upper_ += id else: lower_ += id elif (tag & 0xc) == 0x0: delta = (lower_+upper_) - weight_ if not tag & 0x2: delta += id+1-lower_ weight_ += delta # note we ignore out-of-bounds here for debugging if delta > 0: # grow lifetimes i, id_ = index(weights, lower_) if id_ > 0: weights[i:i+1] = [id_, delta, weights[i]-id_] lifetimes[i:i+1] = [ lifetimes[i], Lifetime(j), lifetimes[i]] else: weights[i:i] = [delta] lifetimes[i:i] = [Lifetime(j)] checkpoint(j, weights, lifetimes, {i}, set(), {i}) elif delta < 0: # shrink lifetimes i, id_ = index(weights, lower_) delta_ = -delta weights_ = weights.copy() lifetimes_ = lifetimes.copy() shrinks = set() while delta_ > 0 and i < len(weights_): if id_ > 0: delta__ = min(delta_, weights_[i]-id_) delta_ -= delta__ weights_[i] -= delta__ i += 1 id_ = 0 elif weights_[i] > delta_: weights_[i] -= delta_ size_ = 0 else: delta_ -= weights_[i] weights_[i:i+1] = [] lifetimes_[i:i+1] = [] shrinks.add(i + len(shrinks)) checkpoint(j, weights, lifetimes, set(), shrinks, {i}) weights = weights_ lifetimes = lifetimes_ if not tag & 0x2: # attach tag to lifetime i, id_ = index(weights, lower_) if i < len(weights): lifetimes[i].add(j) if delta == 0: checkpoint(j, weights, lifetimes, set(), set(), {i}) lifetime_width = 2*max(( sum(1 for lifetime in lifetimes if lifetime) for _, lifetimes, _, _, _ in checkpoints), default=0) def lifetimerepr(j): x = bisect.bisect(checkpoint_js, j)-1 j_ = checkpoint_js[x] weights, lifetimes, grows, shrinks, tags = checkpoints[x] reprs = [] colors = [] was = None for i, (w, lifetime) in enumerate(zip(weights, lifetimes)): # skip lifetimes with no tags and shrinks if not lifetime or (j != j_ and i in shrinks): if i in grows or i in shrinks or i in tags: tags = tags.copy() tags.add(i+1) continue if j == j_ and i in grows: reprs.append('.') was = 'grow' elif j == j_ and i in shrinks: reprs.append('\'') was = 'shrink' elif j == j_ and i in tags: reprs.append('* ') elif was == 'grow': reprs.append('\\ ') elif was == 'shrink': reprs.append('/ ') else: reprs.append('| ') colors.append(lifetime.color) return '%s%*s' % ( ''.join('%s%s%s' % ( '\x1b[%sm' % c if color else '', r, '\x1b[m' if color else '') for r, c in zip(reprs, colors)), lifetime_width - sum(len(r) for r in reprs), '') # print header print('%-8s %*s%-22s %s' % ( 'off', lifetime_width, '', 'tag', 'data (truncated)' if not args.get('no_truncate') else '')) # print revision count if args.get('raw'): print('%8s: %s' % ('%04x' % 0, next(xxd(data[0:4])))) # print tags j_ = 4 while j_ < (block_size if args.get('all') else off): notes = [] j = j_ v, tag, id, size, delta = fromtag(data[j_:]) if v != (popc(crc) & 1): notes.append('v!=%x' % (popc(crc) & 1)) tag &= ~1 crc = crc32c(data[j_:j_+delta], crc) j_ += delta if not tag & 0x8: if (tag & 0xf00f) != TAG_CRC: crc = crc32c(data[j_:j_+size], crc) # found a crc? else: crc_, = struct.unpack('= off else '', j, '\x1b[m' if color and j >= off else '', lifetimerepr(j) if args.get('lifetimes') else '', '\x1b[90m' if color and j >= off else '', '%-22s%s' % ( tagrepr(tag, id, size, j), ' %s' % next(xxd( data[j+delta:j+delta+min(size, 8)], 8), '') if not args.get('no_truncate') and not tag & 0x8 else ''), '\x1b[m' if color and j >= off else '', ' (%s)' % ', '.join(notes) if notes else ' %s' % jumprepr(j) if args.get('jumps') else '')) if args.get('raw'): # show on-disk encoding of tags for o, line in enumerate(xxd(data[j:j+delta])): print('%s%8s: %s%s' % ( '\x1b[90m' if color and j >= off else '', '%04x' % (j + o*16), line, '\x1b[m' if color and j >= off else '')) # show in-device representation, including some extra # crc/parity info if args.get('device'): print('%s%8s %*s%-47s %08x %x%s' % ( '\x1b[90m' if color and j >= off else '', '', lifetime_width, '', '%-22s%s' % ( '%04x %08x %07x' % (tag, 0xffffffff & id, size), ' %s' % ' '.join( '%08x' % struct.unpack('= off else '')) if not tag & 0x8: # show on-disk encoding of data if args.get('raw') or args.get('no_truncate'): for o, line in enumerate(xxd(data[j+delta:j+delta+size])): print('%s%8s: %s%s' % ( '\x1b[90m' if color and j >= off else '', '%04x' % (j+delta + o*16), line, '\x1b[m' if color and j >= off else '')) def show_tree(block_size, data, rev, trunk, weight, *, color=False, **args): if trunk is None: return # lookup a tag, returning also the search path for decoration # purposes def lookup(tag, id): lower = -1 upper = weight path = [] # descend down tree j = trunk while True: _, alt, weight_, jump, delta = fromtag(data[j:]) # found an alt? if alt & 0x8: # follow? if ((id, tag & ~0xf) > (upper-weight_-1, alt & ~0xf) if alt & 0x4 else ((id, tag & ~0xf) <= (lower+weight_, alt & ~0xf))): lower += upper-lower-1-weight_ if alt & 0x4 else 0 upper -= upper-lower-1-weight_ if not alt & 0x4 else 0 j = j - jump if args.get('tree'): # figure out which color if alt & 0x2: _, nalt, _, _, _ = fromtag(data[j+jump+delta:]) if nalt & 0x2: path.append((j+jump, j, 'y')) else: path.append((j+jump, j, 'r')) else: path.append((j+jump, j, 'b')) # stay on path else: lower += weight_ if not alt & 0x4 else 0 upper -= weight_ if alt & 0x4 else 0 j = j + delta if args.get('tree'): # figure out which color if alt & 0x2: _, nalt, _, _, _ = fromtag(data[j:]) if nalt & 0x2: path.append((j-delta, j, 'y')) else: path.append((j-delta, j, 'r')) else: path.append((j-delta, j, 'b')) # found tag else: tag_ = alt id_ = upper-1 w_ = id_-lower done = (id_, tag_) < (id, tag) or tag_ & 2 return done, tag_, id_, w_, j, delta, jump, path # precompute tree tree_width = 0 if args.get('tree'): tags = [] paths = {} tag, id = 0, -1 while True: done, tag, id, w, j, delta, size, path = lookup(tag+0x10, id) # found end of tree? if done: break tags.append((j, tag, id)) for x, (a, b, c) in enumerate(path): paths[a, b, x] = c # align paths to nearest tag tags.sort() paths = {( tags[bisect.bisect_left(tags, (a, 0, -1), hi=len(tags)-1)], tags[bisect.bisect_left(tags, (b, 0, -1), hi=len(tags)-1)], x): c for (a, b, x), c in paths.items()} # also find the maximum depth depth = max((x+1 for _, _, x in paths.keys()), default=0) if depth > 0: tree_width = 2*depth + 2 def treerepr(j): if depth == 0: return '' _, tag, id = tags[bisect.bisect_left( tags, (j, 0, -1), hi=len(tags)-1)] def c_start(c): return ('\x1b[33m' if color and c == 'y' else '\x1b[31m' if color and c == 'r' else '\x1b[90m' if color else '') def c_stop(c): return '\x1b[m' if color else '' path = [] seen = None for x in range(depth): if any(x == x_ and tag == a_tag and id == a_id for (_, a_tag, a_id), _, x_ in paths.keys()): c = next(c for ((_, a_tag, a_id), _, x_), c in paths.items() if x == x_ and tag == a_tag and id == a_id) path.append('%s+%s' % (c_start(c), c_stop(c))) elif any(x == x_ and tag == b_tag and id == b_id for _, (_, b_tag, b_id), x_ in paths.keys()): a_tag, a_id, c = next((a_tag, a_id, c) for ((_, a_tag, a_id), (_, b_tag, b_id), x_), c in paths.items() if x == x_ and tag == b_tag and id == b_id) if (a_id, a_tag) < (id, tag): path.append('%s\'%s' % (c_start(c), c_stop(c))) else: path.append('%s.%s' % (c_start(c), c_stop(c))) elif any(x == x_ and (id, tag) >= min((a_id, a_tag), (b_id, b_tag)) and (id, tag) <= max((a_id, a_tag), (b_id, b_tag)) for (_, a_tag, a_id), (_, b_tag, b_id), x_ in paths.keys()): c = next(c for ((_, a_tag, a_id), (_, b_tag, b_id), x_), c in paths.items() if x == x_ and (id, tag) >= min((a_id, a_tag), (b_id, b_tag)) and (id, tag) <= max((a_id, a_tag), (b_id, b_tag))) path.append('%s|%s' % (c_start(c), c_stop(c))) elif seen: path.append('%s-%s' % (c_start(seen), c_stop(seen))) else: path.append(' ') if any(x == x_ and tag == b_tag and id == b_id for _, (_, b_tag, b_id), x_ in paths.keys()): c = next(c for (_, (_, b_tag, b_id), x_), c in paths.items() if x == x_ and tag == b_tag and id == b_id) seen = c if seen and x == depth-1: path.append('%s->%s' % (c_start(seen), c_stop(seen))) elif seen: path.append('%s-%s' % (c_start(seen), c_stop(seen))) else: path.append(' ') return ' %s' % ''.join(path) # print header w_width = 2*m.ceil(m.log10(max(1, weight)+1))+1 print('%-8s %*s%-*s %-22s %s' % ( 'off', tree_width, '', w_width, 'ids', 'tag', 'data (truncated)' if not args.get('no_truncate') else '')) tag, id = 0, -1 while True: done, tag, id, w, j, delta, size, path = lookup(tag+0x10, id) # found end of tree? if done: break # show human-readable tag representation print('%08x:%s %*s %-57s' % ( j, treerepr(j) if args.get('tree') else '', w_width, '%d-%d' % (id-(w-1), id) if w > 1 else id if w > 0 else '', '%-22s%s' % ( tagrepr(tag, id, size, j), ' %s' % next(xxd( data[j+delta:j+delta+min(size, 8)], 8), '') if not args.get('no_truncate') and not tag & 0x8 else ''))) # show in-device representation if args.get('device'): print('%8s %*s%*s %s' % ( '', tree_width, '', w_width, '', '%-22s%s' % ( '%04x %08x %07x' % (tag, 0xffffffff & id, size), ' %s' % ' '.join( '%08x' % struct.unpack(' 1 else '')) if args.get('log'): show_log(block_size, data, rev, off, color=color, **args) else: show_tree(block_size, data, rev, trunk, weight, color=color, **args) if args.get('error_on_corrupt') and off == 0: sys.exit(2) if __name__ == "__main__": import argparse import sys parser = argparse.ArgumentParser( description="Debug rbyd metadata.", allow_abbrev=False) parser.add_argument( 'disk', help="File containing the block device.") parser.add_argument( 'block1', nargs='?', type=blocklim, help="Block address of the first metadata block.") parser.add_argument( 'block2', nargs='?', type=blocklim, help="Block address of the second metadata block.") parser.add_argument( '-B', '--block-size', type=lambda x: int(x, 0), help="Block size in bytes.") parser.add_argument( '-L', '--limit', type=lambda x: int(x, 0), help="Use this offset as the rbyd limit.") parser.add_argument( '--trunk', type=lambda x: int(x, 0), help="Use this offset as the trunk of the tree.") parser.add_argument( '--color', choices=['never', 'always', 'auto'], default='auto', help="When to use terminal colors. Defaults to 'auto'.") parser.add_argument( '-a', '--all', action='store_true', help="Don't stop parsing on bad commits.") parser.add_argument( '-l', '--log', action='store_true', help="Show the raw tags as they appear in the log.") parser.add_argument( '-r', '--raw', action='store_true', help="Show the raw data including tag encodings.") parser.add_argument( '-x', '--device', action='store_true', help="Show the device-side representation of tags.") parser.add_argument( '-T', '--no-truncate', action='store_true', help="Don't truncate, show the full contents.") parser.add_argument( '-t', '--tree', action='store_true', help="Show the rbyd tree.") parser.add_argument( '-j', '--jumps', action='store_true', help="Show alt pointer jumps in the margin.") parser.add_argument( '-g', '--lifetimes', action='store_true', help="Show inserts/deletes of ids in the margin.") parser.add_argument( '-e', '--error-on-corrupt', action='store_true', help="Error if no valid commit is found.") sys.exit(main(**{k: v for k, v in vars(parser.parse_intermixed_args()).items() if v is not None}))