85 lines
2.2 KiB
Python
85 lines
2.2 KiB
Python
#!/usr/bin/env python3
|
|
"""Sew all frame CSVs in order; deduplicate overlap at boundaries (scroll down)."""
|
|
import csv
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
from row_eq import row_equal
|
|
|
|
|
|
def frame_number_from_name(name):
|
|
m = re.match(r"^(\d+)\.csv$", name)
|
|
return int(m.group(1)) if m else None
|
|
|
|
|
|
def list_frame_csvs(frames_dir):
|
|
names = []
|
|
for name in os.listdir(frames_dir):
|
|
n = frame_number_from_name(name)
|
|
if n is not None:
|
|
names.append((n, name))
|
|
return [name for _, name in sorted(names)]
|
|
|
|
|
|
def read_csv(path):
|
|
with open(path, newline="", encoding="utf-8") as f:
|
|
return list(csv.reader(f))
|
|
|
|
|
|
def write_csv(path, rows):
|
|
with open(path, "w", newline="", encoding="utf-8") as f:
|
|
csv.writer(f).writerows(rows)
|
|
|
|
|
|
def overlap_length(acc_rows, next_rows, num_cols=6):
|
|
max_k = min(len(acc_rows), len(next_rows))
|
|
for k in range(max_k, 0, -1):
|
|
if k > len(acc_rows) or k > len(next_rows):
|
|
continue
|
|
match = True
|
|
for i in range(k):
|
|
if not row_equal(acc_rows[-k + i], next_rows[i], num_cols):
|
|
match = False
|
|
break
|
|
if match:
|
|
return k
|
|
return 0
|
|
|
|
|
|
def dedupe_rows(rows, num_cols=6):
|
|
seen = set()
|
|
out = []
|
|
for row in rows:
|
|
key = tuple((row[i] if i < len(row) else "").strip() for i in range(num_cols))
|
|
if key in seen:
|
|
continue
|
|
seen.add(key)
|
|
out.append(row)
|
|
return out
|
|
|
|
|
|
def sew(frames_dir, out_path, num_cols=6, dedupe=True):
|
|
csv_names = list_frame_csvs(frames_dir)
|
|
if not csv_names:
|
|
return 0
|
|
acc = read_csv(os.path.join(frames_dir, csv_names[0]))
|
|
for name in csv_names[1:]:
|
|
next_rows = read_csv(os.path.join(frames_dir, name))
|
|
k = overlap_length(acc, next_rows, num_cols)
|
|
acc.extend(next_rows[k:])
|
|
if dedupe:
|
|
acc = dedupe_rows(acc, num_cols)
|
|
write_csv(out_path, acc)
|
|
return len(acc)
|
|
|
|
|
|
def main():
|
|
frames_dir = sys.argv[1] if len(sys.argv) > 1 else "frames"
|
|
out_path = sys.argv[2] if len(sys.argv) > 2 else "result.csv"
|
|
n = sew(frames_dir, out_path)
|
|
print(f"Wrote {n} rows to {out_path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|