forked from Imagelibrary/littlefs
scripts: Adopted simpler+faster heuristic for symbol->dwarf mapping
After tinkering around with the scripts for a bit, I've started to
realize difflib is kinda... really slow...
I don't think this is strictly difflib's fault. It's a pure python
library (proof of concept?), may be prioritizing quality over speed, and
I may be throwing too much data at it.
difflib does have quick_ratio() and real_quick_ratio() for faster
comparisons, but while looking into these for correctness, I realized
there's a simpler heuristic we can use since GCC's optimized names seem
strictly additive: Choose the name that matches with the smallest prefix
and suffix.
So comparing, say, lfsr_rbyd_lookup to __lfsr_rbyd_lookup.constprop.0:
lfsr_rbyd_lookup
__lfsr_rbyd_lookup.constprop.0
|'------.-------''----.-----'
'-------|-----. .---'
v v v
key: (matches, 2, 12)
Note we prioritize the prefix, since it seems GCC's optimized names are
strictly suffixes. We also now fail to match if the dwarf name is not
substring, instead of just finding the most similar looking symbol.
This results in both faster and more robust symbol->dwarf mapping:
before: time code.py -Y: 0.393s
after: time code.py -Y: 0.152s
(this is WITH the fast dict lookup on exact matches!)
This also drops difflib from the scripts. So one less dependency to
worry about.
This commit is contained in:
@@ -375,8 +375,6 @@ class DwarfInfo:
|
|||||||
return self.entries.get(k, d)
|
return self.entries.get(k, d)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
import difflib
|
|
||||||
|
|
||||||
# organize entries by name
|
# organize entries by name
|
||||||
if not hasattr(self, '_by_name'):
|
if not hasattr(self, '_by_name'):
|
||||||
self._by_name = {}
|
self._by_name = {}
|
||||||
@@ -384,20 +382,24 @@ class DwarfInfo:
|
|||||||
if entry.name is not None:
|
if entry.name is not None:
|
||||||
self._by_name[entry.name] = entry
|
self._by_name[entry.name] = entry
|
||||||
|
|
||||||
# exact match? avoid difflib if we can for speed
|
# exact match? do a quick lookup
|
||||||
if k in self._by_name:
|
if k in self._by_name:
|
||||||
return self._by_name[k]
|
return self._by_name[k]
|
||||||
# find the best matching dwarf entry with difflib
|
# find the best matching dwarf entry with a simple
|
||||||
|
# heuristic
|
||||||
#
|
#
|
||||||
# this can be different from the actual symbol because
|
# this can be different from the actual symbol because
|
||||||
# of optimization passes
|
# of optimization passes
|
||||||
else:
|
else:
|
||||||
name, entry = max(
|
def key(entry):
|
||||||
self._by_name.items(),
|
i = k.find(entry.name)
|
||||||
key=lambda entry: difflib.SequenceMatcher(
|
if i == -1:
|
||||||
None, entry[0], k, False).ratio(),
|
return None
|
||||||
default=(None, None))
|
return (i, len(k)-(i+len(entry.name)), k)
|
||||||
return entry
|
return min(
|
||||||
|
filter(key, self._by_name.values()),
|
||||||
|
key=key,
|
||||||
|
default=d)
|
||||||
|
|
||||||
def __getitem__(self, k):
|
def __getitem__(self, k):
|
||||||
v = self.get(k)
|
v = self.get(k)
|
||||||
|
|||||||
@@ -384,8 +384,6 @@ class DwarfInfo:
|
|||||||
return self.entries.get(k, d)
|
return self.entries.get(k, d)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
import difflib
|
|
||||||
|
|
||||||
# organize entries by name
|
# organize entries by name
|
||||||
if not hasattr(self, '_by_name'):
|
if not hasattr(self, '_by_name'):
|
||||||
self._by_name = {}
|
self._by_name = {}
|
||||||
@@ -393,20 +391,24 @@ class DwarfInfo:
|
|||||||
if entry.name is not None:
|
if entry.name is not None:
|
||||||
self._by_name[entry.name] = entry
|
self._by_name[entry.name] = entry
|
||||||
|
|
||||||
# exact match? avoid difflib if we can for speed
|
# exact match? do a quick lookup
|
||||||
if k in self._by_name:
|
if k in self._by_name:
|
||||||
return self._by_name[k]
|
return self._by_name[k]
|
||||||
# find the best matching dwarf entry with difflib
|
# find the best matching dwarf entry with a simple
|
||||||
|
# heuristic
|
||||||
#
|
#
|
||||||
# this can be different from the actual symbol because
|
# this can be different from the actual symbol because
|
||||||
# of optimization passes
|
# of optimization passes
|
||||||
else:
|
else:
|
||||||
name, entry = max(
|
def key(entry):
|
||||||
self._by_name.items(),
|
i = k.find(entry.name)
|
||||||
key=lambda entry: difflib.SequenceMatcher(
|
if i == -1:
|
||||||
None, entry[0], k, False).ratio(),
|
return None
|
||||||
default=(None, None))
|
return (i, len(k)-(i+len(entry.name)), k)
|
||||||
return entry
|
return min(
|
||||||
|
filter(key, self._by_name.values()),
|
||||||
|
key=key,
|
||||||
|
default=d)
|
||||||
|
|
||||||
def __getitem__(self, k):
|
def __getitem__(self, k):
|
||||||
v = self.get(k)
|
v = self.get(k)
|
||||||
|
|||||||
@@ -375,8 +375,6 @@ class DwarfInfo:
|
|||||||
return self.entries.get(k, d)
|
return self.entries.get(k, d)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
import difflib
|
|
||||||
|
|
||||||
# organize entries by name
|
# organize entries by name
|
||||||
if not hasattr(self, '_by_name'):
|
if not hasattr(self, '_by_name'):
|
||||||
self._by_name = {}
|
self._by_name = {}
|
||||||
@@ -384,20 +382,24 @@ class DwarfInfo:
|
|||||||
if entry.name is not None:
|
if entry.name is not None:
|
||||||
self._by_name[entry.name] = entry
|
self._by_name[entry.name] = entry
|
||||||
|
|
||||||
# exact match? avoid difflib if we can for speed
|
# exact match? do a quick lookup
|
||||||
if k in self._by_name:
|
if k in self._by_name:
|
||||||
return self._by_name[k]
|
return self._by_name[k]
|
||||||
# find the best matching dwarf entry with difflib
|
# find the best matching dwarf entry with a simple
|
||||||
|
# heuristic
|
||||||
#
|
#
|
||||||
# this can be different from the actual symbol because
|
# this can be different from the actual symbol because
|
||||||
# of optimization passes
|
# of optimization passes
|
||||||
else:
|
else:
|
||||||
name, entry = max(
|
def key(entry):
|
||||||
self._by_name.items(),
|
i = k.find(entry.name)
|
||||||
key=lambda entry: difflib.SequenceMatcher(
|
if i == -1:
|
||||||
None, entry[0], k, False).ratio(),
|
return None
|
||||||
default=(None, None))
|
return (i, len(k)-(i+len(entry.name)), k)
|
||||||
return entry
|
return min(
|
||||||
|
filter(key, self._by_name.values()),
|
||||||
|
key=key,
|
||||||
|
default=d)
|
||||||
|
|
||||||
def __getitem__(self, k):
|
def __getitem__(self, k):
|
||||||
v = self.get(k)
|
v = self.get(k)
|
||||||
|
|||||||
@@ -272,8 +272,6 @@ class DwarfInfo:
|
|||||||
return self.entries.get(k, d)
|
return self.entries.get(k, d)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
import difflib
|
|
||||||
|
|
||||||
# organize entries by name
|
# organize entries by name
|
||||||
if not hasattr(self, '_by_name'):
|
if not hasattr(self, '_by_name'):
|
||||||
self._by_name = {}
|
self._by_name = {}
|
||||||
@@ -281,20 +279,24 @@ class DwarfInfo:
|
|||||||
if entry.name is not None:
|
if entry.name is not None:
|
||||||
self._by_name[entry.name] = entry
|
self._by_name[entry.name] = entry
|
||||||
|
|
||||||
# exact match? avoid difflib if we can for speed
|
# exact match? do a quick lookup
|
||||||
if k in self._by_name:
|
if k in self._by_name:
|
||||||
return self._by_name[k]
|
return self._by_name[k]
|
||||||
# find the best matching dwarf entry with difflib
|
# find the best matching dwarf entry with a simple
|
||||||
|
# heuristic
|
||||||
#
|
#
|
||||||
# this can be different from the actual symbol because
|
# this can be different from the actual symbol because
|
||||||
# of optimization passes
|
# of optimization passes
|
||||||
else:
|
else:
|
||||||
name, entry = max(
|
def key(entry):
|
||||||
self._by_name.items(),
|
i = k.find(entry.name)
|
||||||
key=lambda entry: difflib.SequenceMatcher(
|
if i == -1:
|
||||||
None, entry[0], k, False).ratio(),
|
return None
|
||||||
default=(None, None))
|
return (i, len(k)-(i+len(entry.name)), k)
|
||||||
return entry
|
return min(
|
||||||
|
filter(key, self._by_name.values()),
|
||||||
|
key=key,
|
||||||
|
default=d)
|
||||||
|
|
||||||
def __getitem__(self, k):
|
def __getitem__(self, k):
|
||||||
v = self.get(k)
|
v = self.get(k)
|
||||||
|
|||||||
Reference in New Issue
Block a user