chore: init
This commit is contained in:
commit
edb31d2d31
13 changed files with 531 additions and 0 deletions
85
sew_csvs.py
Normal file
85
sew_csvs.py
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Sew all frame CSVs in order; deduplicate overlap at boundaries (scroll down)."""
|
||||
import csv
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
from row_eq import row_equal
|
||||
|
||||
|
||||
def frame_number_from_name(name):
|
||||
m = re.match(r"^(\d+)\.csv$", name)
|
||||
return int(m.group(1)) if m else None
|
||||
|
||||
|
||||
def list_frame_csvs(frames_dir):
|
||||
names = []
|
||||
for name in os.listdir(frames_dir):
|
||||
n = frame_number_from_name(name)
|
||||
if n is not None:
|
||||
names.append((n, name))
|
||||
return [name for _, name in sorted(names)]
|
||||
|
||||
|
||||
def read_csv(path):
|
||||
with open(path, newline="", encoding="utf-8") as f:
|
||||
return list(csv.reader(f))
|
||||
|
||||
|
||||
def write_csv(path, rows):
|
||||
with open(path, "w", newline="", encoding="utf-8") as f:
|
||||
csv.writer(f).writerows(rows)
|
||||
|
||||
|
||||
def overlap_length(acc_rows, next_rows, num_cols=6):
|
||||
max_k = min(len(acc_rows), len(next_rows))
|
||||
for k in range(max_k, 0, -1):
|
||||
if k > len(acc_rows) or k > len(next_rows):
|
||||
continue
|
||||
match = True
|
||||
for i in range(k):
|
||||
if not row_equal(acc_rows[-k + i], next_rows[i], num_cols):
|
||||
match = False
|
||||
break
|
||||
if match:
|
||||
return k
|
||||
return 0
|
||||
|
||||
|
||||
def dedupe_rows(rows, num_cols=6):
|
||||
seen = set()
|
||||
out = []
|
||||
for row in rows:
|
||||
key = tuple((row[i] if i < len(row) else "").strip() for i in range(num_cols))
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
out.append(row)
|
||||
return out
|
||||
|
||||
|
||||
def sew(frames_dir, out_path, num_cols=6, dedupe=True):
|
||||
csv_names = list_frame_csvs(frames_dir)
|
||||
if not csv_names:
|
||||
return 0
|
||||
acc = read_csv(os.path.join(frames_dir, csv_names[0]))
|
||||
for name in csv_names[1:]:
|
||||
next_rows = read_csv(os.path.join(frames_dir, name))
|
||||
k = overlap_length(acc, next_rows, num_cols)
|
||||
acc.extend(next_rows[k:])
|
||||
if dedupe:
|
||||
acc = dedupe_rows(acc, num_cols)
|
||||
write_csv(out_path, acc)
|
||||
return len(acc)
|
||||
|
||||
|
||||
def main():
|
||||
frames_dir = sys.argv[1] if len(sys.argv) > 1 else "frames"
|
||||
out_path = sys.argv[2] if len(sys.argv) > 2 else "result.csv"
|
||||
n = sew(frames_dir, out_path)
|
||||
print(f"Wrote {n} rows to {out_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue