scripts: Dropped list/tuple distinction in Rbyd.fetch

Also tweaked how we fetch shrubs, adding Rbyd.fetchshrub and Btree.fetchshrub instead of overloading the bd argument. Oh, and also added --trunk to dbgmtree.py and dbglfs.py. Actually _using_ --trunk isn't advised, since it will probably just result in a corrupted filesystem, but these scripts are for accessing things that aren't normally allowed anyways. The reason for dropping the list/tuple distinction is because it was a big ugly hack, unpythonic, and likely to catch users (and myself) by surprise. Now, Rbyd.fetch and friends always require separate block/trunk arguments, and the exercise of deciding which trunk to use is left up to the caller.
2025-03-31 18:09:49 -05:00
parent 1ac3aae92b
commit 82f4fd3c0f
8 changed files with 350 additions and 361 deletions
--- a/scripts/dbglfs.py
+++ b/scripts/dbglfs.py
@@ -81,10 +81,10 @@ def bdgeom(s):
        return int(s, b)

 # parse some rbyd addr encodings
-# 0xa       -> [0xa]
-# 0xa.c     -> [(0xa, 0xc)]
-# 0x{a,b}   -> [0xa, 0xb]
-# 0x{a,b}.c -> [(0xa, 0xc), (0xb, 0xc)]
+# 0xa       -> (0xa,)
+# 0xa.c     -> ((0xa, 0xc),)
+# 0x{a,b}   -> (0xa, 0xb)
+# 0x{a,b}.c -> ((0xa, 0xc), (0xb, 0xc))
 def rbydaddr(s):
    s = s.strip()
    b = 10
@@ -115,7 +115,7 @@ def rbydaddr(s):
        else:
            addr.append(int(s, b))

-    return addr
+    return tuple(addr)

 def crc32c(data, crc=0):
    crc ^= 0xffffffff
@@ -505,9 +505,9 @@ class Rattr:
        self.tag = tag
        self.weight = weight
        if isinstance(blocks, int):
-            self.blocks = [blocks]
+            self.blocks = (blocks,)
        else:
-            self.blocks = list(blocks)
+            self.blocks = blocks
        self.toff = toff
        self.tdata = tdata
        self.data = data
@@ -581,9 +581,9 @@ class Ralt:
        self.tag = tag
        self.weight = weight
        if isinstance(blocks, int):
-            self.blocks = [blocks]
+            self.blocks = (blocks,)
        else:
-            self.blocks = list(blocks)
+            self.blocks = blocks
        self.toff = toff
        self.tdata = tdata
        self.jump = jump
@@ -636,9 +636,9 @@ class Rbyd:
            gcksumdelta=None,
            corrupt=False):
        if isinstance(blocks, int):
-            self.blocks = [blocks]
+            self.blocks = (blocks,)
        else:
-            self.blocks = list(blocks)
+            self.blocks = blocks
        self.trunk = trunk
        self.weight = weight
        self.rev = rev
@@ -681,51 +681,7 @@ class Rbyd:
        return hash((frozenset(self.blocks), self.trunk))

    @classmethod
-    def fetch(cls, bd, blocks, trunk=None):
-        # multiple blocks? unfortunately this must be a list
-        if isinstance(blocks, list):
-            # fetch all blocks
-            rbyds = [cls.fetch(bd, block, trunk) for block in blocks]
-            # determine most recent revision
-            i = 0
-            for i_, rbyd in enumerate(rbyds):
-                # compare with sequence arithmetic
-                if rbyd and (
-                        not rbyds[i]
-                            or not ((rbyd.rev - rbyds[i].rev) & 0x80000000)
-                            or (rbyd.rev == rbyds[i].rev
-                                and rbyd.trunk > rbyds[i].trunk)):
-                    i = i_
-            # keep track of the other blocks
-            rbyd = rbyds[i]
-            rbyd.blocks += tuple(
-                    rbyds[(i+1+j) % len(rbyds)].block
-                        for j in range(len(rbyds)-1))
-            # and patch the gcksumdelta if we have one
-            if rbyd.gcksumdelta is not None:
-                rbyd.gcksumdelta.blocks = rbyd.blocks
-            return rbyd
-
-        block = blocks
-
-        # blocks may also encode trunks
-        block, trunk = (
-                block[0] if isinstance(block, tuple)
-                    else block,
-                trunk if trunk is not None
-                    else block[1] if isinstance(block, tuple)
-                    else None)
-
-        # bd can be either a bd reference or a preread block
-        #
-        # preread blocks can be useful for avoiding race conditions
-        # with cksums and shrubs
-        if isinstance(bd, Bd):
-            # seek/read the block
-            data = bd.readblock(block)
-        else:
-            data = bd
-
+    def _fetch(cls, data, block, trunk=None):
        # fetch the rbyd
        rev = fromle32(data[0:4])
        cksum = 0
@@ -822,6 +778,39 @@ class Rbyd:
                gcksumdelta=gcksumdelta,
                corrupt=not trunk_)

+    @classmethod
+    def fetch(cls, bd, blocks, trunk=None):
+        # multiple blocks?
+        if not isinstance(blocks, int):
+            # fetch all blocks
+            rbyds = [cls.fetch(bd, block, trunk) for block in blocks]
+            # determine most recent revision
+            i = 0
+            for i_, rbyd in enumerate(rbyds):
+                # compare with sequence arithmetic
+                if rbyd and (
+                        not rbyds[i]
+                            or not ((rbyd.rev - rbyds[i].rev) & 0x80000000)
+                            or (rbyd.rev == rbyds[i].rev
+                                and rbyd.trunk > rbyds[i].trunk)):
+                    i = i_
+            # keep track of the other blocks
+            rbyd = rbyds[i]
+            rbyd.blocks += tuple(
+                    rbyds[(i+1+j) % len(rbyds)].block
+                        for j in range(len(rbyds)-1))
+            # and patch the gcksumdelta if we have one
+            if rbyd.gcksumdelta is not None:
+                rbyd.gcksumdelta.blocks = rbyd.blocks
+            return rbyd
+
+        # seek/read the block
+        block = blocks
+        data = bd.readblock(block)
+
+        # fetch the rbyd
+        return cls._fetch(data, block, trunk)
+
    @classmethod
    def fetchck(cls, bd, blocks, trunk, weight, cksum):
        # try to fetch the rbyd normally
@@ -838,6 +827,15 @@ class Rbyd:

        return rbyd

+    @classmethod
+    def fetchshrub(cls, rbyd, trunk):
+        # steal the original rbyd's data
+        #
+        # this helps avoid race conditions with cksums and stuff
+        shrub = cls._fetch(rbyd.data, rbyd.block, trunk)
+        shrub.blocks = rbyd.blocks
+        return shrub
+
    def lookupnext(self, rid, tag=None, *,
            path=False):
        if not self or rid >= self.weight:
@@ -1206,32 +1204,21 @@ class Btree:

    @classmethod
    def fetch(cls, bd, blocks, trunk=None):
-        # bd can either be a bd reference or a tuple of bd + data to
-        # avoid rereads, but we need a real bd reference somehow
-        if isinstance(bd, tuple):
-            bd, data = bd
-        else:
-            bd, data = bd, bd
-        assert isinstance(bd, Bd)
-
        # rbyd fetch does most of the work here
-        rbyd = Rbyd.fetch(data, blocks, trunk)
+        rbyd = Rbyd.fetch(bd, blocks, trunk)
        return cls(bd, rbyd)

    @classmethod
    def fetchck(cls, bd, blocks, trunk, weight, cksum):
-        # bd can either be a bd reference or a tuple of bd + data to
-        # avoid rereads, but we need a real bd reference somehow
-        if isinstance(bd, tuple):
-            bd, data = bd
-        else:
-            bd, data = bd, bd
-        assert isinstance(bd, Bd)
-
        # rbyd fetchck does most of the work here
-        rbyd = Rbyd.fetchck(data, blocks, trunk, weight, cksum)
+        rbyd = Rbyd.fetchck(bd, blocks, trunk, weight, cksum)
        return cls(bd, rbyd)

+    @classmethod
+    def fetchshrub(cls, bd, rbyd, trunk):
+        shrub = Rbyd.fetchshrub(rbyd, trunk)
+        return cls(bd, shrub)
+
    def lookupleaf(self, bid, *,
            path=None,
            depth=None):
@@ -1804,13 +1791,9 @@ class Mdir:
        return hash(frozenset(self.blocks))

    @classmethod
-    def fetch(cls, bd, mid, blocks):
-        rbyd = Rbyd.fetch(bd, blocks)
-        # this affects mbits
-        if isinstance(bd, Bd):
-            return cls(mid, rbyd, mbits=Mtree.mbits_(bd))
-        else:
-            return cls(mid, rbyd)
+    def fetch(cls, bd, mid, blocks, trunk=None):
+        rbyd = Rbyd.fetch(bd, blocks, trunk)
+        return cls(mid, rbyd, mbits=Mtree.mbits_(bd))

    def lookup_(self, mid, tag=None, mask=None, *,
            path=False):
@@ -1999,11 +1982,8 @@ class Mtree:
        return hash(self.mrootanchor)

    @classmethod
-    def fetch(cls, bd, blocks=None, *,
+    def fetch(cls, bd, blocks=None, trunk=None, *,
            depth=None):
-        # we need a real bd reference here
-        assert isinstance(bd, Bd)
-
        # default to blocks 0x{0,1}
        if blocks is None:
            blocks = [0, 1]
@@ -2012,7 +1992,7 @@ class Mtree:
        mbits = Mtree.mbits_(bd)

        # fetch the mrootanchor
-        mrootanchor = Mdir.fetch(bd, -1, blocks)
+        mrootanchor = Mdir.fetch(bd, -1, blocks, trunk)

        # follow the mroot chain to try to find the active mroot
        mroot = mrootanchor
@@ -2736,15 +2716,10 @@ class Bptr:

    @classmethod
    def fetch(cls, bd, rattr, block, off, size, cksize, cksum):
-        # bd can be either a bd reference or a preread block
-        if isinstance(bd, Bd):
-            # seek/read cksize bytes from the block, the actual data
-            # should always be a subset of this
-            bd.seek(block)
-            ckdata = bd.read(cksize)
-        else:
-            # truncate to cksize
-            ckdata = bd[:cksize]
+        # seek/read cksize bytes from the block, the actual data should
+        # always be a subset of cksize
+        bd.seek(block)
+        ckdata = bd.read(cksize)

        return cls(rattr, block, off, size, cksize, cksum, ckdata)

@@ -3303,10 +3278,10 @@ class Lfs:
        return hash(self.mrootanchor)

    @classmethod
-    def fetch(cls, bd, blocks=None, *,
+    def fetch(cls, bd, blocks=None, trunk=None, *,
            depth=None):
        # Mtree does most of the work here
-        mtree = Mtree.fetch(bd, blocks,
+        mtree = Mtree.fetch(bd, blocks, trunk,
                depth=depth)
        return cls(bd, mtree)

@@ -3597,8 +3572,7 @@ class Lfs:
            if (self.struct is not None
                    and (self.struct.tag & ~0x3) == TAG_BSHRUB):
                weight, trunk = fromshrub(self.struct.data)
-                self.bshrub = Btree.fetch(
-                        (lfs.bd, mdir.data), self.mdir.blocks, trunk)
+                self.bshrub = Btree.fetchshrub(lfs.bd, mdir.rbyd, trunk)
            elif (self.struct is not None
                    and (self.struct.tag & ~0x3) == TAG_BTREE):
                weight, block, trunk, cksum = frombtree(self.struct.data)
@@ -4393,6 +4367,7 @@ def dbg_files(lfs, paths,


 def main(disk, mroots=None, paths=None, *,
+        trunk=None,
        block_size=None,
        block_count=None,
        color='auto',
@@ -4426,9 +4401,19 @@ def main(disk, mroots=None, paths=None, *,
            block_count = block_count_

    # flatten mroots, default to 0x{0,1}
-    if not mroots:
-        mroots = [(0,1)]
-    mroots = [block for mroots_ in mroots for block in mroots_]
+    mroots = list(it.chain.from_iterable(mroots)) if mroots else [0, 1]
+
+    # mroots may also encode trunks
+    mroots, trunk = (
+            [block[0] if isinstance(block, tuple)
+                    else block
+                for block in mroots],
+            trunk if trunk is not None
+                else ft.reduce(
+                    lambda x, y: y,
+                    (block[1] for block in mroots
+                        if isinstance(block, tuple)),
+                    None))

    # we seek around a bunch, so just keep the disk open
    with open(disk, 'rb') as f:
@@ -4439,7 +4424,7 @@ def main(disk, mroots=None, paths=None, *,

        # fetch the filesystem
        bd = Bd(f, block_size, block_count)
-        lfs = Lfs.fetch(bd, mroots)
+        lfs = Lfs.fetch(bd, mroots, trunk)

        # print some information about the filesystem
        print('littlefs%s v%s.%s %sx%s %s w%s.%s, rev %08x, cksum %08x%s' % (
@@ -4503,7 +4488,7 @@ if __name__ == "__main__":
        def __call__(self, parser, namespace, values, option):
            for value in values:
                # mroot?
-                if isinstance(value, list):
+                if not isinstance(value, str):
                    if getattr(namespace, 'mroots', None) is None:
                        namespace.mroots = []
                    namespace.mroots.append(value)
@@ -4525,6 +4510,10 @@ if __name__ == "__main__":
            action=AppendMrootOrPath,
            help="Paths to show, must start with a leading slash. Defaults "
                "to the root directory.")
+    parser.add_argument(
+            '--trunk',
+            type=lambda x: int(x, 0),
+            help="Use this offset as the trunk of the mroots.")
    parser.add_argument(
            '-b', '--block-size',
            type=bdgeom,