better compression

This commit is contained in:
AF 2023-08-02 04:15:29 +00:00
parent 3a5734b98b
commit 3474e536fc
2 changed files with 30 additions and 10 deletions

View File

@ -47,7 +47,7 @@ RUN git fetch && git checkout f81543a34ee363dcc00e8632fd7cfcd4a3478b23
ENV SRCDIR="." ENV SRCDIR="."
ENV SRCPATTERN="*.[ach]*" ENV SRCPATTERN="*.[ach]*"
FROM metrics-tmg as metrics-repo FROM metrics-radn as metrics-repo
FROM metrics-repo as metrics-commits FROM metrics-repo as metrics-commits
COPY common.py /code/common.py COPY common.py /code/common.py

View File

@ -1,4 +1,5 @@
import pickle import pickle
from random import randrange
from subprocess import check_output from subprocess import check_output
from common import counter from common import counter
@ -6,31 +7,50 @@ from common import counter
args = [] args = []
original_commits = check_output(["git", "log", "--pretty=%H", *args], text=True).splitlines() original_commits = check_output(["git", "log", "--pretty=%H", *args], text=True).splitlines()
N = len(original_commits) N = len(original_commits)
filtered_commits = set(original_commits[:: N // min(N, 4096)]) BITS = 10
total_changes: dict[str, int] = {} total_changes: dict[str, tuple[int, int]] = {}
chosen_parents: dict[str, str | None] = {} chosen_parents: dict[str, str | None] = {}
heights: dict[str, int] = {}
included = set()
for i, commit in enumerate(reversed(original_commits)): for i, commit in enumerate(reversed(original_commits)):
print(f"P={i / N:6f}", flush=True) print(f"C={i / N:6f}", flush=True)
parents = check_output(["git", "log", "--pretty=%P", "-n", "1", commit], text=True).split() parents = check_output(["git", "log", "--pretty=%P", "-n", "1", commit], text=True).split()
chosen_parent = None chosen_parent = None
if commit in filtered_commits: height = 1 + max((heights[parent] for parent in parents), default=0)
mask = ((1 << height.bit_length()) - 1) >> BITS
expected = int.from_bytes(bytes.fromhex(commit), "little")
if mask & (height ^ expected) == 0:
ctr = counter(commit, True) ctr = counter(commit, True)
changes = ctr.total() changes = ctr.total(), 1
for parent in parents: for parent in parents:
pctr = counter(parent, True) if parent in included:
maybe_changes = (pctr - ctr).total() + (ctr - pctr).total() + total_changes[parent] _parent = parent
else:
_parent = chosen_parents[parent]
if _parent is None:
continue
assert _parent in included
pctr = counter(_parent, True)
pcc, pch = total_changes[parent]
maybe_changes = (pctr - ctr).total() + (ctr - pctr).total() + pcc, pch + 1
if maybe_changes > changes: if maybe_changes > changes:
changes = maybe_changes changes = maybe_changes
chosen_parent = parent chosen_parent = parent
included.add(commit)
else: else:
changes = 0 changes = 0, 1
for parent in parents: for parent in parents:
maybe_changes = total_changes[parent] pcc, pch = total_changes[parent]
maybe_changes = pcc, pch + 1
if maybe_changes > changes: if maybe_changes > changes:
changes = maybe_changes changes = maybe_changes
chosen_parent = parent chosen_parent = parent
total_changes[commit] = changes total_changes[commit] = changes
if chosen_parent is not None and chosen_parent not in included:
chosen_parent = chosen_parents[chosen_parent]
assert chosen_parent is None or chosen_parent in included
chosen_parents[commit] = chosen_parent chosen_parents[commit] = chosen_parent
heights[commit] = height
commit = original_commits[0] commit = original_commits[0]
commits = [] commits = []
while commit is not None: while commit is not None: