Initial, very, very rough implementation of rbyd range deletion

Tree deletion is such a pain. It always seems like an easy addition to the core algorithm but always comes with problems. The initial plan for deletes was to iterate through all tags, tombstone, and then adjust weights as needed. This accomplishes deletes with little change to the rbyd algorithm, but adds a complex traversal inside the commit logic. Doable in one commit, but complex. It also risks weird unintuitive corner cases since the cost of deletion grows with the number of tags being deleted (O(m log n)). But this rbyd data structure is a tree, so in theory it's possible to delete a whole range of tags in a single O(log n) operation. --- This is a proof-of-concept range deletion algorithm for rbyd trees. Note, this does not preserve rbyd's balancing properties! But it is no worse than tombstoning. This is acceptable for littlefs as any unbalanced trees will be rebalanced during compaction. The idea is to follow the same underlying dhara algorithm, where we follow a search path and save any alt pointers not taken, but we follow both search paths that form the outside of the range, and only keep outside edges. For example, a tree: .-------o-------. | | .---o---. .---o---. | | | | .-o-. .-o-. .-o-. .-o-. | | | | | | | | a b c d e f g h To delete the range d-e, we would search for d, and search for e: ********o******** * * .---***** *****---. | * * | .-o-. .-*** ***-. .-o-. | | | * * | | | a b c d e f g h And keep the outside edges: .--- ---. | | .-o-. .- -. .-o-. | | | | | | a b c f g h But how do we combine the outside edges? The simpler option is to do both searches seperately, one after the other. This would end up with a tree like this: .---------o | | .-o-. .---o | | | | a b c o---------. | | o---. .-o-. | | | | _ f g h But this horribly throws off the balance of our tree! It's worse than tombstoning, and gets worse with more tags. An alternative strategy, which is used here, is to alternate edges as we descend down the tree. This unfortunately is more complex, and requires ~2x the RAM, but better preserves the balance of our tree. It isn't perfect, because we lose color information, but we can leave that up to compaction: .---------o | | .-o-. o---------. | | | | a b .---o .-o-. | | | | c o---. g h | | _ f I also hope this can be merged into lfs_rbyd_append, deduplicating the entire core rbyd append algorithm.
2022-12-31 00:52:12 -06:00
parent cde3ba4cd8
commit ca710b5a29
3 changed files with 751 additions and 46 deletions
--- a/scripts/dbgrbyd.py
+++ b/scripts/dbgrbyd.py
@@ -54,16 +54,15 @@ def tagrepr(tag, size, off=None):
    type2 = (tag >> 7) & 0xff
    id = (tag >> 15) & 0xffff

-    if (type1 & 0x7e) == 0x40:
-        return '%screate x%02x id%d%s' % (
-            '~' if type1 & 0x1 else '',
-            type2,
+    if (type1 & 0x7f) == 0x40:
+        return 'create%s id%d%s' % (
+            'reg' if type2 == 1
+                else ' x%02x' % type2,
            id,
            ' %d' % size if not type1 & 0x1 else '')
-    elif (type1 & 0x7e) == 0x48:
-        return '%sdelete x%02x id%d%s' % (
-            '~' if type1 & 0x1 else '',
-            type2,
+    elif (type1 & 0x7f) == 0x41:
+        return 'delete%s id%d%s' % (
+            ' x%02x' % type2 if type2 else '',
            id,
            ' %d' % size if not type1 & 0x1 else '')
    elif (type1 & 0x7e) == 0x50:
@@ -245,10 +244,11 @@ def main(disk, block_size, block1, block2=None, *,
    # print tags
    if args.get('rbyd'):
        alts = []
-    if args.get('ids'):
+    if args.get('lifetimes'):
        count = 0
        ids = []
        ids_i = 0
+        deleted_id = ''
    j = 4
    while j < (block_size if args.get('all') else off):
        notes = []
@@ -264,15 +264,15 @@ def main(disk, block_size, block1, block2=None, *,
            if (tag & 0x7e) != 0x2:
                crc = crc32c(data[j:j+size], crc)
                # adjust count
-                if args.get('ids'):
+                if args.get('lifetimes'):
                    if (tag & 0x7f) == 0x40:
                        count += 1
                        ids.insert(((tag >> 15) & 0xffff)-1,
                            COLORS[ids_i % len(COLORS)])
                        ids_i += 1
-                    elif (tag & 0x7f) == 0x48:
+                    elif (tag & 0x7f) == 0x41:
                        count -= 1
-                        ids.pop(((tag >> 15) & 0xffff)-1)
+                        deleted_id = ids.pop(((tag >> 15) & 0xffff)-1)
            # found a crc?
            else:
                crc_, = struct.unpack('<I', data[j:j+4].ljust(4, b'\0'))
@@ -290,7 +290,7 @@ def main(disk, block_size, block1, block2=None, *,
                        line,
                        '\x1b[m' if color and j_ >= off else ''))

-        if not args.get('in_tree') or (tag & 0x7) == 0:
+        if not args.get('in_tree') or (tag & 0x6) == 0:
            # show human-readable tag representation
            print('%s%08x: %-57s%s%s' % (
                '\x1b[90m' if color and j_ >= off else '',
@@ -315,23 +315,35 @@ def main(disk, block_size, block1, block2=None, *,
                    if args.get('rbyd') and (tag & 0x7) == 0
                else '  %s' % jumprepr(j_)
                    if args.get('jumps')
-                else '  %s' % ''.join(
-                    '%s%s%s' % (
-                        '\x1b[%sm' % ids[id] if color else '',
-                        '.' if (tag & 0x7f) == 0x40
-                            and id == ((tag >> 15) & 0xffff)-1
-                        else '\'' if (tag & 0x7f) == 0x48
-                            and id == ((tag >> 15) & 0xffff)-1
-                        else '* ' if not tag & 0x4
-                            and id == ((tag >> 15) & 0xffff)-1
-                        else '\ ' if (tag & 0x7f) == 0x40
-                            and id > ((tag >> 15) & 0xffff)-1
-                        else '/ ' if (tag & 0x7f) == 0x48
-                            and id > ((tag >> 15) & 0xffff)-1
-                        else '| ',
+                else '  %s%s' % (
+                    ''.join(
+                        '%s%s%s%s' % (
+                            '%s\'%s' % (
+                                '\x1b[%sm' % deleted_id if color else '',
+                                '\x1b[m' if color else '')
+                                if (tag & 0x7f) == 0x41
+                                and id == ((tag >> 15) & 0xffff)-1
+                                else '',
+                            '\x1b[%sm' % ids[id] if color else '',
+                            '.' if (tag & 0x7f) == 0x40
+                                and id == ((tag >> 15) & 0xffff)-1
+                            else '* ' if not tag & 0x4
+                                and not (tag & 0x7f) == 0x41
+                                and id == ((tag >> 15) & 0xffff)-1
+                            else '\ ' if (tag & 0x7f) == 0x40
+                                and id > ((tag >> 15) & 0xffff)-1
+                            else '/ ' if (tag & 0x7f) == 0x41
+                                and id >= ((tag >> 15) & 0xffff)-1
+                            else '| ',
+                            '\x1b[m' if color else '')
+                            for id in range(count)),
+                    '%s\'%s' % (
+                        '\x1b[%sm' % deleted_id if color else '',
                        '\x1b[m' if color else '')
-                        for id in range(count))
-                    if args.get('ids')
+                        if (tag & 0x7f) == 0x41
+                        and count == ((tag >> 15) & 0xffff)-1
+                        else '')
+                    if args.get('lifetimes')
                else ''))

            # show in-device representation, including some extra
@@ -403,7 +415,7 @@ if __name__ == "__main__":
        action='store_true',
        help="Don't stop parsing on bad commits.")
    parser.add_argument(
-        '-t', '--in-tree',
+        '-i', '--in-tree',
        action='store_true',
        help="Only show tags in the tree.")
    parser.add_argument(
@@ -427,7 +439,7 @@ if __name__ == "__main__":
        action='store_true',
        help="Show alt pointer jumps in the margin.")
    parser.add_argument(
-        '-i', '--ids',
+        '-g', '--lifetimes',
        action='store_true',
        help="Show inserts/deletes of ids in the margin.")
    parser.add_argument(