scripts: Added treemapd3.py

Like treemap.py, but outputting an svg file, which is quite a bit more
useful.

Things svg is _not_:

- A simple vector graphics format

Things svg _is_:

- A surprisingly powerful high-level graphics language.

I might have to use svgs as an output format more often. It's
surprisingly easy to generate graphics without worrying about low-level
rendering details.

---

Aside from the extra flags for svg details like font, padding,
background colors, etc, the main difference between treemap.py and
treemapd3.py is the addition of the --nested mode, which renders a
containing tile for each recursive group (each -b/--by field).

There's no way --nested would've worked in treemap.py. The main benefit
is the extra labels per subgroup, which are already hard enough to read
in treemap.py.

Other than that, treemapd3.py is mostly the same as treemap.py, but with
a resolution that's actually readable.
This commit is contained in:
Christopher Haster
2025-02-16 04:26:46 -06:00
parent d6c909e724
commit 2135c6a003
4 changed files with 1030 additions and 144 deletions

View File

@@ -320,8 +320,8 @@ class Canvas:
class Tile:
def __init__(self, key, children,
x=None, y=None, width=None, height=None, *,
depth=None,
label=None,
char=None,
color=None):
self.key = key
if isinstance(children, list):
@@ -335,8 +335,8 @@ class Tile:
self.y = y
self.width = width
self.height = height
self.depth = depth
self.label = label
self.char = char
self.color = color
def __repr__(self):
@@ -366,17 +366,12 @@ class Tile:
tiles__.append(Tile.merge(t, key))
tiles_ = tiles__
return Tile(prefix, tiles_)
return Tile(prefix, tiles_, depth=len(prefix))
def __lt__(self, other):
return self.value < other.value
# sort recursively
def sort(self):
self.children.sort(reverse=True)
for t in self.children:
t.sort()
# recursive traversals
def tiles(self):
yield self
for child in self.children:
@@ -387,15 +382,51 @@ class Tile:
if not t.children:
yield t
# sort recursively
def sort(self):
self.children.sort(reverse=True)
for t in self.children:
t.sort()
# recursive align to int boundaries
def align(self):
# this extra +0.1 and using points instead of width/height is
# to help minimize rounding errors
x0 = int(self.x+0.1)
y0 = int(self.y+0.1)
x1 = int(self.x+self.width+0.1)
y1 = int(self.y+self.height+0.1)
self.x = x0
self.y = y0
self.width = x1 - x0
self.height = y1 - y0
# recurse
for t in self.children:
t.align()
# return some interesting info about these tiles
def stat(self):
leaves = list(self.leaves())
mean = self.value / max(len(leaves), 1)
stddev = mt.sqrt(sum((t.value - mean)**2 for t in leaves)
/ max(len(leaves), 1))
min_ = min((t.value for t in leaves), default=0)
max_ = max((t.value for t in leaves), default=0)
return {
'total': self.value,
'mean': mean,
'stddev': stddev,
'min': min_,
'max': max_,
}
# our parititioning schemes
def partition_binary(tile, *,
partition=None):
partition = partition or partition_binary
def partition_binary(children, total, x, y, width, height):
sums = [0]
for t in tile.children:
for t in children:
sums.append(sums[-1] + t.value)
# recursively partition into a roughly weight-balanced binary tree
@@ -405,10 +436,10 @@ def partition_binary(tile, *,
return
# single child? assign the partition
elif i == j-1:
tile.children[i].x = x
tile.children[i].y = y
tile.children[i].width = width
tile.children[i].height = height
children[i].x = x
children[i].y = y
children[i].width = width
children[i].height = height
return
# binary search to find best split index
@@ -434,90 +465,58 @@ def partition_binary(tile, *,
partition_(i, k, l, x, y, width, dy)
partition_(k, j, r, x, y+dy, width, height-dy)
partition_(0, len(tile.children), tile.value,
tile.x, tile.y, tile.width, tile.height)
# recursively partition
for t in tile.children:
partition(t, partition=partition)
def partition_slice(tile, *,
partition=None):
partition = partition or partition_slice
partition_(0, len(children), total, x, y, width, height)
def partition_slice(children, total, x, y, width, height):
# give each child a slice
x_ = tile.x
for t in tile.children:
x_ = x
for t in children:
t.x = x_
t.y = tile.y
t.width = (t.value / tile.value) * tile.width
t.height = tile.height
t.y = y
t.width = (t.value / total) * width
t.height = height
x_ += t.width
# recursively partition
for t in tile.children:
partition(t, partition=partition)
def partition_dice(tile, *,
partition=None):
partition = partition or partition_dice
def partition_dice(children, total, x, y, width, height):
# give each child a slice
y_ = tile.y
for t in tile.children:
t.x = tile.x
y_ = y
for t in children:
t.x = x
t.y = y_
t.width = tile.width
t.height = (t.value / tile.value) * tile.height
t.width = width
t.height = (t.value / total) * height
y_ += t.height
# recursively partition
for t in tile.children:
partition(t, partition=partition)
def partition_slice_and_dice(tile, *,
partition=None):
partition_slice(tile, partition=partition or partition_dice_and_slice)
def partition_dice_and_slice(tile, *,
partition=None):
partition_dice(tile, partition=partition or partition_slice_and_dice)
def partition_squarify(tile, *,
partition=None, aspect_ratio=None):
partition = partition or partition_squarify
# derive target aspect ratio from top-level tile
aspect_ratio = aspect_ratio or (tile.width, tile.height)
def partition_squarify(children, total, x, y, width, height):
# this algorithm is described here:
# https://www.win.tue.nl/~vanwijk/stm.pdf
i = 0
x = tile.x
y = tile.y
value = tile.value
width = tile.width
height = tile.height
# this is our target aspect ratio, note we don't really care
# about width vs height until actually slicing
ratio = aspect_ratio[0] / aspect_ratio[1]
x_ = x
y_ = y
total_ = total
width_ = width
height_ = height
# derive target aspect ratio from top-level tile, note we don't
# really care about width vs height until actually slicing
ratio = max(width/height, height/width)
while i < len(tile.children):
while i < len(children):
# calculate initial aspect ratio
sum_ = tile.children[i].value
min_ = tile.children[i].value
max_ = tile.children[i].value
w = value * (ratio / max(width/height, height/width))
sum_ = children[i].value
min_ = children[i].value
max_ = children[i].value
w = total_ * (ratio / max(width_/height_, height_/width_))
ratio_ = max((max_*w)/(sum_**2), (sum_**2)/(min_*w))
# keep adding children to this row/col until it starts to hurt
# our aspect ratio
j = i + 1
while j < len(tile.children):
sum__ = sum_ + tile.children[j].value
min__ = min(min_, tile.children[j].value)
max__ = max(max_, tile.children[j].value)
while j < len(children):
sum__ = sum_ + children[j].value
min__ = min(min_, children[j].value)
max__ = max(max_, children[j].value)
ratio__ = max((max__*w)/(sum__**2), (sum__**2)/(min__*w))
if ratio__ > ratio_:
break
@@ -529,33 +528,23 @@ def partition_squarify(tile, *,
j += 1
# vertical col? dice horizontally?
if width > height:
dx = (sum_ / value) * width
partition_dice(Tile(
tile.key, tile.children[i:j],
x, y, dx, height),
partition=lambda *_, **__: ())
x += dx
width -= dx
if width_ > height_:
dx = (sum_ / total_) * width_
partition_dice(children[i:j], sum_, x_, y_, dx, height_)
x_ += dx
width_ -= dx
# horizontal row? slice vertically?
else:
dy = (sum_ / value) * height
partition_slice(Tile(
tile.key, tile.children[i:j],
x, y, width, dy),
partition=lambda *_, **__: ())
y += dy
height -= dy
dy = (sum_ / total_) * height_
partition_slice(children[i:j], sum_, x_, y_, width_, dy)
y_ += dy
height_ -= dy
# start partitioning the other direction
value -= sum_
total_ -= sum_
i = j
# recursively partition
for t in tile.children:
partition(t, partition=partition, aspect_ratio=aspect_ratio)
def main(csv_paths, *,
by=None,
@@ -572,6 +561,8 @@ def main(csv_paths, *,
no_header=False,
to_scale=None,
aspect_ratio=(1,1),
title=None,
padding=0,
**args):
# figure out what color should be
if color == 'auto':
@@ -601,13 +592,11 @@ def main(csv_paths, *,
width_ = shutil.get_terminal_size((80, 5))[0]
if height is None:
height_ = 1
height_ = 2 if title is not None or not no_header else 1
elif height:
height_ = height
else:
height_ = shutil.get_terminal_size((80, 5))[1]
# make space for shell prompt and stuff
height_ = max(height_-(1 if no_header else 2), 1)
# first collect results from CSV files
fields_, results = collect(csv_paths, defines)
@@ -651,14 +640,14 @@ def main(csv_paths, *,
# use colors for top of tree
for i, t in enumerate(tile.children):
for t_ in t.tiles():
t_.color = i % len(colors_)
t_.color = colors_[i % len(colors_)]
# and chars for bottom of tree
for i, t in enumerate(tile.leaves()):
t.char = i % len(chars_)
t.char = chars_[i % len(chars_)]
# scale width/height if requested now that we have our data
if to_scale and (width is None or height is None):
if to_scale and (width is None or height is None) and tile.value:
# scale if needed
if braille:
xscale, yscale = 2, 4
@@ -690,7 +679,7 @@ def main(csv_paths, *,
# create a canvas
canvas = Canvas(width_, height_,
color=color,
dots=dots or chars is None,
dots=dots,
braille=braille)
# recursively partition tiles
@@ -698,32 +687,74 @@ def main(csv_paths, *,
tile.y = 0
tile.width = canvas.width
tile.height = canvas.height
if tile.value:
if args.get('binary'):
partition_binary(tile)
elif args.get('slice'):
partition_slice(tile)
elif args.get('dice'):
partition_dice(tile)
elif args.get('slice_and_dice'):
partition_slice_and_dice(tile)
elif args.get('dice_and_slice'):
partition_dice_and_slice(tile)
elif args.get('squarify'):
partition_squarify(tile)
def partition(tile):
# apply top padding
if tile.depth == 0:
tile.x += padding
tile.y += padding
tile.width -= min(padding, tile.width)
tile.height -= min(padding, tile.height)
x__ = tile.x
y__ = tile.y
width__ = tile.width
height__ = tile.height
# create space for header
if title is not None or not no_header:
y__ += 1
height__ -= min(1, height__)
else:
# default to binary partitioning
partition_binary(tile)
# apply bottom padding
if not tile.children:
tile.width -= min(padding, tile.width)
tile.height -= min(padding, tile.height)
x__ = tile.x
y__ = tile.y
width__ = tile.width
height__ = tile.height
# partition via requested scheme
if tile.children:
if args.get('binary'):
partition_binary(tile.children, tile.value,
x__, y__, width__, height__)
elif (args.get('slice')
or (args.get('slice_and_dice') and (tile.depth & 1) == 0)
or (args.get('dice_and_slice') and (tile.depth & 1) == 1)):
partition_slice(tile.children, tile.value,
x__, y__, width__, height__)
elif (args.get('dice')
or (args.get('slice_and_dice') and (tile.depth & 1) == 1)
or (args.get('dice_and_slice') and (tile.depth & 1) == 0)):
partition_dice(tile.children, tile.value,
x__, y__, width__, height__)
elif args.get('squarify'):
partition_squarify(tile.children, tile.value,
x__, y__, width__, height__)
else:
# default to binary partitioning
partition_binary(tile.children, tile.value,
x__, y__, width__, height__)
# recursively partition
for t in tile.children:
partition(t)
partition(tile)
# align to pixel boundaries
tile.align()
# render to canvas
labels_ = []
for t in tile.leaves():
# this extra +0.1 and using points instead of width/height is
# to help minimize rounding errors
x__ = int(t.x+0.1)
y__ = int(t.y+0.1)
width__ = int(t.x+t.width+0.1) - x__
height__ = int(t.y+t.height+0.1) - y__
x__ = t.x
y__ = t.y
width__ = t.width
height__ = t.height
# skip anything with zero weight/height after aligning things
if width__ == 0 or height__ == 0:
continue
@@ -741,8 +772,8 @@ def main(csv_paths, *,
if chars is None
and t.key
and t.key[-1]
else chars_[t.char or 0]),
color=colors_[t.color or 0])
else t.char if t.char is not None else chars_[0]),
color=t.color if t.color is not None else colors_[0])
if labels:
if t.label is not None:
@@ -758,17 +789,20 @@ def main(csv_paths, *,
# print some summary info
if not no_header:
leaves = list(tile.leaves())
mean = tile.value / max(len(leaves), 1)
stddev = mt.sqrt(sum((t.value - mean)**2 for v in leaves)
/ max(len(leaves), 1))
min_ = min((t.value for t in leaves), default=0)
max_ = max((t.value for t in leaves), default=0)
print('total %d, avg %d +-%.1fσ, min %d, max %d' % (
tile.value, mean, stddev, min_, max_))
stat = tile.stat()
stat_ = 'total %d, avg %d +-%dσ, min %d, max %d' % (
stat['total'],
stat['mean'], stat['stddev'],
stat['min'], stat['max'])
if title and not no_header:
print('%s%*s%s' % (title, width_-len(stat_)-len(title), '', stat_))
elif title:
print(title)
elif not no_header:
print(stat_)
# draw canvas
for row in range(height_):
for row in range(1 if title or not no_header else 0, height_):
line = canvas.draw(row)
print(line)
@@ -886,10 +920,18 @@ if __name__ == "__main__":
help="Scale the resulting treemap such that 1 pixel ~= 1/scale "
"units. Defaults to scale=1. ")
parser.add_argument(
'--aspect-ratio',
'-R', '--aspect-ratio',
type=lambda x: tuple(float(v) for v in x.split(':', 1)),
default=(1, 1),
help="Aspect ratio to use with --to-scale. Defaults to 1:1.")
parser.add_argument(
'--title',
help="Add a title.")
parser.add_argument(
'--padding',
type=float,
default=0,
help="Padding to add to each level of the treemap. Defaults to 0.")
sys.exit(main(**{k: v
for k, v in vars(parser.parse_intermixed_args()).items()
if v is not None}))