"""Assign word box to column/row by position; merge cell text with heuristics.""" import re def assign_cell(cx, cy, col_bounds, row_bounds): col_idx = None for i, (left, right) in enumerate(col_bounds): if left <= cx <= right: col_idx = i break if col_idx is None: return None, None row_idx = None for i, (y0, y1) in enumerate(row_bounds): if y0 <= cy <= y1: row_idx = i break if row_idx is None: return col_idx, None return col_idx, row_idx def merge_cell_words(words, col_idx): if col_idx == 1: return "".join(w.strip() for w in words).strip() if col_idx in (3, 4, 5): s = " ".join(w.strip() for w in words).strip() s = s.replace(",", ".") return s return " ".join(w.strip() for w in words).strip() def normalize_float(s): s = (s or "").strip().replace(",", ".") if not s: return "" m = re.match(r"^[\d.]+", s) return m.group(0) if m else s