45 lines
1.4 KiB
Python
45 lines
1.4 KiB
Python
"""Apply heuristic fixes to a single CSV row (strip, E->-, pipe->space, extensão)."""
|
|
import re
|
|
|
|
|
|
def strip_cell(c):
|
|
return (c or "").strip().replace("\r", "").replace("\n", "")
|
|
|
|
|
|
def fix_cell_value(value, col_idx):
|
|
s = strip_cell(value)
|
|
if col_idx == 0 and "|" in s:
|
|
s = s.replace("|", " ")
|
|
if col_idx == 5 and s in ("E", "e", "EX", "ES", "EN"):
|
|
s = "-"
|
|
if col_idx == 5 and s and re.match(r"^\d+$", s):
|
|
s = s + ".0"
|
|
return s
|
|
|
|
|
|
def fix_row(row, num_cols=6):
|
|
return [fix_cell_value(row[i] if i < len(row) else "", i) for i in range(num_cols)]
|
|
|
|
|
|
def fix_extensao_from_km(row):
|
|
if len(row) < 6:
|
|
return row
|
|
try:
|
|
km_ini = float(str(row[3]).replace(",", ".").strip() or "0")
|
|
km_fin = float(str(row[4]).replace(",", ".").strip() or "0")
|
|
ext = str(row[5]).replace(",", ".").strip()
|
|
expected = round(km_fin - km_ini, 2)
|
|
if not ext or ext == "-" or ext in ("E", "e"):
|
|
row = list(row)
|
|
row[5] = f"{expected:.2f}" if km_fin or km_ini else "-"
|
|
return row
|
|
current = float(ext) if re.match(r"^[\d.]+$", ext) else None
|
|
if current is not None and abs(current - expected) < 0.02:
|
|
return row
|
|
if current is not None:
|
|
row = list(row)
|
|
row[5] = f"{expected:.2f}"
|
|
return row
|
|
except (ValueError, TypeError):
|
|
pass
|
|
return row
|