chore: init
This commit is contained in:
commit
edb31d2d31
13 changed files with 531 additions and 0 deletions
38
assign_cells.py
Normal file
38
assign_cells.py
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
"""Assign word box to column/row by position; merge cell text with heuristics."""
|
||||
import re
|
||||
|
||||
|
||||
def assign_cell(cx, cy, col_bounds, row_bounds):
|
||||
col_idx = None
|
||||
for i, (left, right) in enumerate(col_bounds):
|
||||
if left <= cx <= right:
|
||||
col_idx = i
|
||||
break
|
||||
if col_idx is None:
|
||||
return None, None
|
||||
row_idx = None
|
||||
for i, (y0, y1) in enumerate(row_bounds):
|
||||
if y0 <= cy <= y1:
|
||||
row_idx = i
|
||||
break
|
||||
if row_idx is None:
|
||||
return col_idx, None
|
||||
return col_idx, row_idx
|
||||
|
||||
|
||||
def merge_cell_words(words, col_idx):
|
||||
if col_idx == 1:
|
||||
return "".join(w.strip() for w in words).strip()
|
||||
if col_idx in (3, 4, 5):
|
||||
s = " ".join(w.strip() for w in words).strip()
|
||||
s = s.replace(",", ".")
|
||||
return s
|
||||
return " ".join(w.strip() for w in words).strip()
|
||||
|
||||
|
||||
def normalize_float(s):
|
||||
s = (s or "").strip().replace(",", ".")
|
||||
if not s:
|
||||
return ""
|
||||
m = re.match(r"^[\d.]+", s)
|
||||
return m.group(0) if m else s
|
||||
Loading…
Add table
Add a link
Reference in a new issue