#!/usr/bin/env python3 """Sew all frame CSVs in order; deduplicate overlap at boundaries (scroll down).""" import csv import os import re import sys from row_eq import row_equal def frame_number_from_name(name): m = re.match(r"^(\d+)\.csv$", name) return int(m.group(1)) if m else None def list_frame_csvs(frames_dir): names = [] for name in os.listdir(frames_dir): n = frame_number_from_name(name) if n is not None: names.append((n, name)) return [name for _, name in sorted(names)] def read_csv(path): with open(path, newline="", encoding="utf-8") as f: return list(csv.reader(f)) def write_csv(path, rows): with open(path, "w", newline="", encoding="utf-8") as f: csv.writer(f).writerows(rows) def overlap_length(acc_rows, next_rows, num_cols=6): max_k = min(len(acc_rows), len(next_rows)) for k in range(max_k, 0, -1): if k > len(acc_rows) or k > len(next_rows): continue match = True for i in range(k): if not row_equal(acc_rows[-k + i], next_rows[i], num_cols): match = False break if match: return k return 0 def dedupe_rows(rows, num_cols=6): seen = set() out = [] for row in rows: key = tuple((row[i] if i < len(row) else "").strip() for i in range(num_cols)) if key in seen: continue seen.add(key) out.append(row) return out def sew(frames_dir, out_path, num_cols=6, dedupe=True): csv_names = list_frame_csvs(frames_dir) if not csv_names: return 0 acc = read_csv(os.path.join(frames_dir, csv_names[0])) for name in csv_names[1:]: next_rows = read_csv(os.path.join(frames_dir, name)) k = overlap_length(acc, next_rows, num_cols) acc.extend(next_rows[k:]) if dedupe: acc = dedupe_rows(acc, num_cols) write_csv(out_path, acc) return len(acc) def main(): frames_dir = sys.argv[1] if len(sys.argv) > 1 else "frames" out_path = sys.argv[2] if len(sys.argv) > 2 else "result.csv" n = sew(frames_dir, out_path) print(f"Wrote {n} rows to {out_path}") if __name__ == "__main__": main()