import pickle from collections import Counter from common import counter with open("commits.dat", "rb") as file: commits: list[str] = pickle.load(file) entries = [] last_ctr = Counter() last_cor = [] cors = [] C = min(len(commits), 720) for i, commit in enumerate(reversed(commits)): print(f"P={i/len(commits):6f}", flush=True) print("running", commit, flush=True) current_ctr = counter(commit, True) added = current_ctr - last_ctr deleted = last_ctr - current_ctr entries.append((i, current_ctr.total(), added.total(), deleted.total())) current_cor = [] common_ctr = current_ctr & last_ctr for j, line in reversed(last_cor): if common_ctr[line]: common_ctr[line] -= 1 current_cor.append((j, line)) current_cor.reverse() c = i * C // len(commits) for line in added.elements(): current_cor.append((c, line)) cor_ctr = Counter(j for j, _ in current_cor) assert len(current_cor) == cor_ctr.total() == current_ctr.total() while c >= len(cors): cors.append([0] * i) for j, cor in enumerate(cors): cor.append(cor_ctr[j]) last_ctr = current_ctr last_cor = current_cor with open("/code/metrics.dat", "wb") as file: pickle.dump({"entries": entries, "cors": cors}, file)