better compression

This commit is contained in:
AF 2023-08-02 04:15:29 +00:00
parent 3a5734b98b
commit 3474e536fc
2 changed files with 30 additions and 10 deletions

View File

@ -47,7 +47,7 @@ RUN git fetch && git checkout f81543a34ee363dcc00e8632fd7cfcd4a3478b23
ENV SRCDIR="."
ENV SRCPATTERN="*.[ach]*"
FROM metrics-tmg as metrics-repo
FROM metrics-radn as metrics-repo
FROM metrics-repo as metrics-commits
COPY common.py /code/common.py

View File

@ -1,4 +1,5 @@
import pickle
from random import randrange
from subprocess import check_output
from common import counter
@ -6,31 +7,50 @@ from common import counter
args = []
original_commits = check_output(["git", "log", "--pretty=%H", *args], text=True).splitlines()
N = len(original_commits)
filtered_commits = set(original_commits[:: N // min(N, 4096)])
total_changes: dict[str, int] = {}
BITS = 10
total_changes: dict[str, tuple[int, int]] = {}
chosen_parents: dict[str, str | None] = {}
heights: dict[str, int] = {}
included = set()
for i, commit in enumerate(reversed(original_commits)):
print(f"P={i / N:6f}", flush=True)
print(f"C={i / N:6f}", flush=True)
parents = check_output(["git", "log", "--pretty=%P", "-n", "1", commit], text=True).split()
chosen_parent = None
if commit in filtered_commits:
height = 1 + max((heights[parent] for parent in parents), default=0)
mask = ((1 << height.bit_length()) - 1) >> BITS
expected = int.from_bytes(bytes.fromhex(commit), "little")
if mask & (height ^ expected) == 0:
ctr = counter(commit, True)
changes = ctr.total()
changes = ctr.total(), 1
for parent in parents:
pctr = counter(parent, True)
maybe_changes = (pctr - ctr).total() + (ctr - pctr).total() + total_changes[parent]
if parent in included:
_parent = parent
else:
_parent = chosen_parents[parent]
if _parent is None:
continue
assert _parent in included
pctr = counter(_parent, True)
pcc, pch = total_changes[parent]
maybe_changes = (pctr - ctr).total() + (ctr - pctr).total() + pcc, pch + 1
if maybe_changes > changes:
changes = maybe_changes
chosen_parent = parent
included.add(commit)
else:
changes = 0
changes = 0, 1
for parent in parents:
maybe_changes = total_changes[parent]
pcc, pch = total_changes[parent]
maybe_changes = pcc, pch + 1
if maybe_changes > changes:
changes = maybe_changes
chosen_parent = parent
total_changes[commit] = changes
if chosen_parent is not None and chosen_parent not in included:
chosen_parent = chosen_parents[chosen_parent]
assert chosen_parent is None or chosen_parent in included
chosen_parents[commit] = chosen_parent
heights[commit] = height
commit = original_commits[0]
commits = []
while commit is not None: