38 lines
1 KiB
Python
38 lines
1 KiB
Python
"""Assign word box to column/row by position; merge cell text with heuristics."""
|
|
import re
|
|
|
|
|
|
def assign_cell(cx, cy, col_bounds, row_bounds):
|
|
col_idx = None
|
|
for i, (left, right) in enumerate(col_bounds):
|
|
if left <= cx <= right:
|
|
col_idx = i
|
|
break
|
|
if col_idx is None:
|
|
return None, None
|
|
row_idx = None
|
|
for i, (y0, y1) in enumerate(row_bounds):
|
|
if y0 <= cy <= y1:
|
|
row_idx = i
|
|
break
|
|
if row_idx is None:
|
|
return col_idx, None
|
|
return col_idx, row_idx
|
|
|
|
|
|
def merge_cell_words(words, col_idx):
|
|
if col_idx == 1:
|
|
return "".join(w.strip() for w in words).strip()
|
|
if col_idx in (3, 4, 5):
|
|
s = " ".join(w.strip() for w in words).strip()
|
|
s = s.replace(",", ".")
|
|
return s
|
|
return " ".join(w.strip() for w in words).strip()
|
|
|
|
|
|
def normalize_float(s):
|
|
s = (s or "").strip().replace(",", ".")
|
|
if not s:
|
|
return ""
|
|
m = re.match(r"^[\d.]+", s)
|
|
return m.group(0) if m else s
|