chore: init

This commit is contained in:
Thiago Sposito 2026-02-04 21:11:16 -03:00
commit edb31d2d31
13 changed files with 531 additions and 0 deletions

45
clean_csv_heuristics.py Normal file
View file

@ -0,0 +1,45 @@
"""Apply heuristic fixes to a single CSV row (strip, E->-, pipe->space, extensão)."""
import re
def strip_cell(c):
return (c or "").strip().replace("\r", "").replace("\n", "")
def fix_cell_value(value, col_idx):
s = strip_cell(value)
if col_idx == 0 and "|" in s:
s = s.replace("|", " ")
if col_idx == 5 and s in ("E", "e", "EX", "ES", "EN"):
s = "-"
if col_idx == 5 and s and re.match(r"^\d+$", s):
s = s + ".0"
return s
def fix_row(row, num_cols=6):
return [fix_cell_value(row[i] if i < len(row) else "", i) for i in range(num_cols)]
def fix_extensao_from_km(row):
if len(row) < 6:
return row
try:
km_ini = float(str(row[3]).replace(",", ".").strip() or "0")
km_fin = float(str(row[4]).replace(",", ".").strip() or "0")
ext = str(row[5]).replace(",", ".").strip()
expected = round(km_fin - km_ini, 2)
if not ext or ext == "-" or ext in ("E", "e"):
row = list(row)
row[5] = f"{expected:.2f}" if km_fin or km_ini else "-"
return row
current = float(ext) if re.match(r"^[\d.]+$", ext) else None
if current is not None and abs(current - expected) < 0.02:
return row
if current is not None:
row = list(row)
row[5] = f"{expected:.2f}"
return row
except (ValueError, TypeError):
pass
return row