chore: init

This commit is contained in:
Thiago Sposito 2026-02-04 21:11:16 -03:00
commit edb31d2d31
13 changed files with 531 additions and 0 deletions

38
assign_cells.py Normal file
View file

@ -0,0 +1,38 @@
"""Assign word box to column/row by position; merge cell text with heuristics."""
import re
def assign_cell(cx, cy, col_bounds, row_bounds):
col_idx = None
for i, (left, right) in enumerate(col_bounds):
if left <= cx <= right:
col_idx = i
break
if col_idx is None:
return None, None
row_idx = None
for i, (y0, y1) in enumerate(row_bounds):
if y0 <= cy <= y1:
row_idx = i
break
if row_idx is None:
return col_idx, None
return col_idx, row_idx
def merge_cell_words(words, col_idx):
if col_idx == 1:
return "".join(w.strip() for w in words).strip()
if col_idx in (3, 4, 5):
s = " ".join(w.strip() for w in words).strip()
s = s.replace(",", ".")
return s
return " ".join(w.strip() for w in words).strip()
def normalize_float(s):
s = (s or "").strip().replace(",", ".")
if not s:
return ""
m = re.match(r"^[\d.]+", s)
return m.group(0) if m else s