scripts: Adopted crc32c lib in parity.py

This is actually faster than a byte-wise xor in Python:

  parity.py disk (1MiB) w/  crc32c lib: 0m0.027s
  parity.py disk (1MiB) w/o crc32c lib: 0m0.051s

There's probably some other library that can do this even faster, but
parity.py is not a critical script.
This commit is contained in:
Christopher Haster
2025-04-07 16:33:04 -05:00
parent 3ff25a4fdf
commit 33e2e5b1db

View File

@@ -11,6 +11,11 @@ import sys
import functools as ft import functools as ft
import operator as op import operator as op
try:
import crc32c as crc32c_lib
except ModuleNotFoundError:
crc32c_lib = None
def openio(path, mode='r', buffering=-1): def openio(path, mode='r', buffering=-1):
# allow '-' for stdin/stdout # allow '-' for stdin/stdout
@@ -23,6 +28,17 @@ def openio(path, mode='r', buffering=-1):
else: else:
return open(path, mode, buffering) return open(path, mode, buffering)
def crc32c(data, crc=0):
if crc32c_lib is not None:
return crc32c_lib.crc32c(data, crc)
else:
crc ^= 0xffffffff
for b in data:
crc ^= b
for j in range(8):
crc = (crc >> 1) ^ ((crc & 1) * 0x82f63b78)
return 0xffffffff ^ crc
def popc(x): def popc(x):
return bin(x).count('1') return bin(x).count('1')
@@ -34,18 +50,12 @@ def main(paths, **args):
# interpret as sequence of hex bytes # interpret as sequence of hex bytes
if args.get('hex'): if args.get('hex'):
bytes_ = [b for path in paths for b in path.split()] bytes_ = [b for path in paths for b in path.split()]
print('%01x' % parity(ft.reduce( print('%01x' % parity(crc32c(bytes(int(b, 16) for b in bytes_))))
op.xor,
bytes(int(b, 16) for b in bytes_),
0)))
# interpret as strings # interpret as strings
elif args.get('string'): elif args.get('string'):
for path in paths: for path in paths:
print('%01x' % parity(ft.reduce( print('%01x' % parity(crc32c(path.encode('utf8'))))
op.xor,
path.encode('utf8'),
0)))
# default to interpreting as paths # default to interpreting as paths
else: else:
@@ -54,20 +64,20 @@ def main(paths, **args):
for path in paths: for path in paths:
with openio(path or '-', 'rb') as f: with openio(path or '-', 'rb') as f:
# calculate parity # calculate crc, crc32c preserves parity
xor = 0 crc = 0
while True: while True:
block = f.read(io.DEFAULT_BUFFER_SIZE) block = f.read(io.DEFAULT_BUFFER_SIZE)
if not block: if not block:
break break
xor = ft.reduce(op.xor, block, xor) crc = crc32c(block, crc)
# print what we found # print what we found
if path is not None: if path is not None:
print('%01x %s' % (parity(xor), path)) print('%01x %s' % (parity(crc), path))
else: else:
print('%01x' % parity(xor)) print('%01x' % parity(crc))
if __name__ == "__main__": if __name__ == "__main__":