#!/usr/bin/env python3
"""
bidfood_to_json.py  v2
Converteert het meest recente Bidfood JSON naar FM-klaar JSON.
Cron: 45 5 * * * python3 /var/www/html/bidfood/downloads/bidfood_to_json.py >> /var/log/bidfood_convert.log 2>&1
"""

import json, gzip, logging, os, re, shutil, sys, time
from datetime import datetime, timezone

INPUT_PATH  = "/var/www/html/bidfood/downloads/latest.json"
OUTPUT_PATH = "/var/www/html/bidfood/downloads/latest_fm.json"
LOG_PATH    = "/var/log/bidfood_convert.log"

CATEGORY_MAP = {
    "030-FR:Sauces chaudes prête à l'emploi-NL:Klant-en-klare hete sauzen":("32.5","36.2"),
    "-FR:Sauces chaudes prête à l'emploi-NL:Klant-en-klare hete sauzen":("32.5","36.2"),
    "-FR:Chapelure-NL:Broodkruimel":("32.5","36.2"),
    "-FR:Gratins et pommes de terre préparée-NL:Gratins en bereiden aardappelen":("32.4","36.11"),
    "Aardappelproducten":("32.5","36.2"),
    "Antipasti en gegrilde groente":("32.5","36.2"),
    "Aperitiefkoeken":("32.5","36.2"),
    "Aromatische kruiden en andere":("32.5","36.2"),
    "Azijn":("32.5","36.2"),
    "Bindmiddelen en verdikkingsmiddelen":("32.5","36.2"),
    "Bloem":("32.5","36.2"),
    "Bouillons en bases voor bouillons":("32.5","36.2"),
    "Chips":("32.5","36.2"),
    "Chocolade":("32.5","36.2"),
    "Coating hocolade en décoraties":("32.5","36.2"),
    "Coatinghocolade en décoraties":("32.5","36.2"),
    "Confituur":("32.5","36.2"),
    "Dressing en vinaigrette":("32.5","36.2"),
    "Fonds, fumets en jus":("32.5","36.2"),
    "Gedehydrateerde bases en soepen":("32.5","36.2"),
    "Gedroogde groenten":("32.5","36.2"),
    "Gedroogde hete sauzen":("32.5","36.2"),
    "Gedroogde vruchten en gekonfijte vruchte":("32.5","36.2"),
    "Gedroogde vruchten, noten en zaden":("32.5","36.2"),
    "Gemengde bereidingen":("32.5","36.2"),
    "Gist":("32.5","36.2"),
    "Granen":("32.5","36.2"),
    "Groenteconserven":("32.5","36.11"),
    "Groenten conserven":("32.5","36.11"),
    "Groentconserven":("32.5","36.11"),
    "Honing":("32.5","36.2"),
    "Ijsblokje":("32.1","36.5"),
    "Koekjes en zandkoekjes":("32.5","36.2"),
    "Konfijten, tapenades en pesto":("32.5","36.2"),
    "Korst":("32.4","36.6"),
    "Koude sauzen":("32.5","36.2"),
    "Kruiden":("32.5","36.2"),
    "Marinade":("32.5","36.2"),
    "Olië":("32.5","36.2"),
    "Ontbijtgranen":("32.5","36.2"),
    "Pasta":("32.5","36.2"),
    "Pinda's en pistachenoten":("32.5","36.2"),
    "Purees":("32.5","36.2"),
    "Rijst":("32.5","36.2"),
    "Roux":("32.5","36.2"),
    "Smeerpasta":("32.5","36.2"),
    "Snack mix":("32.5","36.2"),
    "Snoep":("32.5","36.2"),
    "Specerij":("32.5","36.2"),
    "Suiker en zoetstof":("32.5","36.2"),
    "Toastjes, broodstengels en blini's":("32.5","36.2"),
    "Toppings & Sauzen":("32.5","36.2"),
    "Vet":("32.5","36.2"),
    "Visblikjes":("32.5","36.17"),
    "Vruchten conserven":("32.5","36.2"),
    "Vruchtenbereidingen":("32.5","36.2"),
    "Wereldkruiden":("32.5","36.2"),
    "Zout":("32.5","36.2"),
    "Alternatief voor melkproducten":("32.3","36.19"),
    "Alternatief voor vlees":("32.3","36.8"),
    "Alternatief voor vleeswaren":("32.3","36.7"),
    "Bereide salade":("32.3","36.16"),
    "Blauwaderkazen":("32.3","36.19"),
    "Boter":("32.3","36.19"),
    "Brood":("32.3","36.6"),
    "Bun, hotdogbroodjes en wraps":("32.3","36.6"),
    "Eieren":("32.3","36.19"),
    "Gemarineerde vis":("32.3","36.17"),
    "Geperste kazen":("32.3","36.19"),
    "Geraspte kazen":("32.3","36.19"),
    "Ham gekookt":("32.3","36.7"),
    "Ham gerookt":("32.3","36.7"),
    "Hapje en bodem":("32.3","36.16"),
    "Harde kazen":("32.3","36.19"),
    "Heel":("32.3","36.8"),
    "Kaassnack":("32.3","36.19"),
    "Kip":("32.3","36.23"),
    "Melk":("32.3","36.19"),
    "Room":("32.3","36.19"),
    "Rug en filet":("32.3","36.17"),
    "Rundvlees":("32.3","36.8"),
    "Saladespreads":("32.3","36.16"),
    "Schaaldieren":("32.3","36.10"),
    "Smeltkazen":("32.3","36.19"),
    "Spek en speklapje":("32.3","36.7"),
    "Varkensvlees":("32.3","36.8"),
    "Verse kazen":("32.3","36.19"),
    "Yoghurt natuur":("32.3","36.19"),
    "Alternatief voor desserts":("32.4","36.16"),
    "Bak en schaal":("32.4","36.16"),
    "Cake en moelleux":("32.4","36.16"),
    "Chocoladebroodje":("32.4","36.6"),
    "Croissant":("32.4","36.6"),
    "Deegstuk":("32.4","36.6"),
    "Diepvries groenten":("32.4","36.11"),
    "Diepvries vruchten":("32.4","36.11"),
    "Diepvriesdesserts":("32.4","36.16"),
    "Frieten":("32.4","36.11"),
    "Gevogelte snack":("32.4","36.23"),
    "Halve stokbroden":("32.4","36.6"),
    "Individuele desserts":("32.4","36.16"),
    "Lasagne en gevulde pasta":("32.4","36.16"),
    "Minisdesserts":("32.4","36.16"),
    "Piccolo, sandiwch, speciale broden":("32.4","36.6"),
    "Pizza's, quiches en hartige taartjes":("32.4","36.16"),
    "Vissnack":("32.4","36.17"),
    "Vleessnack":("32.4","36.8"),
    "Wafels":("32.4","36.6"),
    "Zuiveldesserts":("32.4","36.16"),
    "Aperitieven en sterke dranken":("32.1","36.3"),
    "Sap en nectar":("32.1","36.5"),
    "Sirop":("32.1","36.5"),
    "Thee":("32.1","36.4"),
    "Warme dranken":("32.1","36.4"),
    "Wijn":("32.1","36.22"),
    "Dienbladen en presentatie":("32.2","36.14"),
    "Drogisterij":("32.6","36.15"),
    "Facility and administration":("32.6","36.15"),
    "Feesten en activiteiten":("32.6","36.15"),
    "Folie, aluminiumfolie en andere":("32.6","36.15"),
    "Kaars":("32.6","36.15"),
    "Klein materiaal":("32.6","36.15"),
    "Kookmateriaal":("32.6","36.15"),
    "Lichaamshygiëne":("32.6","36.15"),
    "Pizzamaterialen":("32.6","36.15"),
    "Ronde schalen, potten en kommen":("32.2","36.14"),
    "Schalen en bakjes":("32.2","36.14"),
    "Servies":("32.2","36.14"),
    "Tassen en zakjes":("32.6","36.15"),
    "Veiligheid":("32.6","36.15"),
    "Zaalmaterialen":("32.6","36.15"),
    "Broodkruimel":("32.5","36.2"),
    "Gratins en bereiden aardappelen":("32.4","36.11"),
    "Andere":("",""),
    "Assortiment":("",""),
    "Oosterse snacks":("",""),
    "Verwerkte producten":("",""),
}

# SYR eenheidscodes (valuelist 2)
UNIT_38_TO_2 = {
    "38.1": "2.21",
    "38.2": "2.12",
    "38.3": "2.13",
    "38.4": "2.10",
    "38.5": "2.28",
    "38.6": "2.26",
    "38.7": "2.31",
    "38.8": "2.27",
}

UNIT_MAP = {
    "PCE":"2.13",  # stuk
    "PAQ":"2.4",   # pak
    "SAC":"2.16",  # zak
    "BOI":"2.22",  # blik
    "BT": "2.10",  # fles
    "TRA":"2.29",  # tray
    "CRN":"2.2",   # karton
    "SEA":"2.18",  # emmer
}

# SYR inhoudsmaat codes (valuelist 38)
UNIT_NL_MAP = {
    "st":  "38.3",  # stuk
    "kg":  "38.2",  # kg
    "gr":  "38.5",  # gram
    "ml":  "38.7",  # ml
    "cl":  "38.6",  # cl
    "lt":  "38.1",  # liter
    "portie": "38.8",
}
import urllib.request

def load_syr_codes():
    url = "https://ai-pdf.seeyouresto.com/bidfood/syr_codes.json"
    try:
        with urllib.request.urlopen(url, timeout=10) as r:
            return json.loads(r.read())
    except Exception as e:
        log.warning(f"syr_codes.json niet beschikbaar: {e} — hardcoded waarden gebruikt")
        return None
logging.basicConfig(filename=LOG_PATH, level=logging.INFO,
    format="%(asctime)s  %(levelname)s  %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
log = logging.getLogger(__name__)

def parse_bilingual(s, lang="NL"):
    if not s: return ""
    m = re.search(rf"-{lang}:([^-]*)(?:-[A-Z]{{2}}:|$)", s)
    return m.group(1).strip() if m else s

def parse_unit_nl(s, unit_nl_to_2=None, unit_nl_to_38=None):
    if unit_nl_to_2  is None: unit_nl_to_2  = {}
    if unit_nl_to_38 is None: unit_nl_to_38 = {}
    if not s: return "", "", ""
    s = s.strip()
    # Patroon NxYYYeenheid: neem inhoud per stuk (na de x)
    m2 = re.match(r"^[0-9]+[xX]([0-9][0-9.,]*)(\s*)(stuks?|stuk|kg|gr|g|ml|mL|cl|L|l)$", s, re.IGNORECASE)
    if m2:
        qty  = m2.group(1).replace(",", ".")
        unit = m2.group(3).lower().strip()
        norm2  = {"stuks":"stuk","g":"gram","gr":"gram","mL":"ml","l":"lt"}.get(unit, unit)
        norm38 = {"stuks":"st","stuk":"st","g":"gr","gram":"gr","mL":"ml","l":"lt"}.get(unit, unit)
        return qty, unit_nl_to_2.get(norm2, ""), unit_nl_to_38.get(norm38, "")
    if "+" in s or "/" in s:
        return "", "", ""
    m = re.match(
        r"^([0-9][0-9.,]*)(\s*)(stuks?|stuk|pièce|Stuk|kg|gr|g|ml|mL|cl|L|l)$",
        s, re.IGNORECASE
    )
    if m:
        qty  = m.group(1).replace(",", ".")
        unit = m.group(3).lower().strip()
        norm2  = {"stuks":"stuk","pièce":"stuk","g":"gram","gr":"gram","mL":"ml","l":"lt"}.get(unit, unit)
        norm38 = {"stuks":"st","stuk":"st","pièce":"st","g":"gr","gram":"gr","mL":"ml","l":"lt"}.get(unit, unit)
        return qty, unit_nl_to_2.get(norm2, ""), unit_nl_to_38.get(norm38, "")
    return "", "", ""

def convert():
    t0 = time.time()
    log.info("=== Bidfood conversie gestart ===")

    syr_codes = load_syr_codes()
    if syr_codes:
        unit_nl_to_2  = syr_codes.get("unit_nl_to_2",  {})
        unit_nl_to_38 = syr_codes.get("unit_nl_to_38", {})
    else:
        unit_nl_to_2  = {}
        unit_nl_to_38 = {}
    
    if not os.path.exists(INPUT_PATH):
        log.error(f"Input niet gevonden: {INPUT_PATH}"); sys.exit(1)

    with open(INPUT_PATH, encoding="utf-8") as f:
        data = json.load(f)

    products = data.get("BidfoodCatalogs", [])
    total = len(products)
    log.info(f"Producten: {total}")

    articles = []
    unmapped = {}
    stats = {"volledig":0, "enkel_groep":0, "geen":0}

    for p in products:
        if not p.get("generalInformation"): continue
        gi  = p.get("generalInformation",  [{}])[0]
        li  = p.get("logisticInformation", [{}])[0]
        pr  = p.get("prices",              [{}])[0]
        wh  = p.get("webHierarchy",        [{}])[0] if p.get("webHierarchy") else {}

        erp          = gi.get("erpNumber", "")
        name_nl      = gi.get("nameNL", "")
        name_fr      = gi.get("nameFR", "") or name_nl
        brand        = gi.get("brand", "")
        availability = gi.get("availability", "")

        min_unit     = li.get("minOrderUnit", "")
        min_unit_syr = UNIT_MAP.get(min_unit, "")
        min_unit_nl  = li.get("minOrderUnitDescriptionNL", "")
        min_unit_ean = li.get("minOrderUnitEAN", "")
        max_unit     = li.get("maxOrderUnit", "")
        max_unit_syr = UNIT_MAP.get(max_unit, "")
        max_unit_ean = li.get("maxOrderUnitEAN", "")
        coefficient  = li.get("coefficientUnitMinAndMax", "")

        # Coëfficiënt opschonen: '10,000000...' → '10'
        if coefficient:
            try:
                coefficient = str(int(float(coefficient.replace(',', '.'))))
            except:
                coefficient = ""

        # ── Verpakkingslogica ─────────────────────────────────────────────
        # emb_achat = altijd minOrderUnit (verpakkingsvorm)
        emb_achat = min_unit_syr

        # Parse minOrderUnitNL voor qty en eenheden
        qty, cont_emb_2, cont_emb_38 = parse_unit_nl(min_unit_nl, unit_nl_to_2, unit_nl_to_38)

        q_emb_achat    = qty          # Eart_QEmbAchat (inhoud per verpakking)
        cont_emb_achat = cont_emb_2   # Eart_ContEmbAchat (eenheid 2.*)
        cont_unite_art = cont_emb_38  # Eart_ContUniteArticle (eenheid 38.*)
        order_info_nl  = min_unit_nl  # Eart_order_info_nl

        # min_Bestelhoeveelheid = coefficient (enkel als karton aanwezig)
        min_bestel = coefficient if max_unit else ""

        # Unite Article = 38→2 mapping
        unite_article = UNIT_38_TO_2.get(cont_emb_38, "")

        # ── BTW ───────────────────────────────────────────────────────────
        vat_raw = pr.get("VAT", "")
        try:
            vat = str(int(float(vat_raw))) if vat_raw else ""
        except:
            vat = vat_raw

        # ── Prijs ─────────────────────────────────────────────────────────
        price = pr.get("price", "").replace(".", ",")

        # ── Categorisatie ─────────────────────────────────────────────────
        family_raw   = wh.get("family", "")
        subfam_raw   = wh.get("subFamily", "")
        family_nl    = parse_bilingual(family_raw, "NL")
        family_fr    = parse_bilingual(family_raw, "FR")
        subfamily_nl = parse_bilingual(subfam_raw, "NL")
        subfamily_fr = parse_bilingual(subfam_raw, "FR")

        syr_groep, syr_subgroep = CATEGORY_MAP.get(family_nl, ("", ""))
        if not syr_groep and family_nl:
            unmapped[family_nl] = unmapped.get(family_nl, 0) + 1

        if syr_groep and syr_subgroep: stats["volledig"] += 1
        elif syr_groep:                stats["enkel_groep"] += 1
        else:                          stats["geen"] += 1

        articles.append({
            # Identificatie
            "erpNumber":       erp,
            "nameNL":          name_nl,
            "nameFR":          name_fr,
            "brand":           brand,
            # Status
            "availability":    availability,
            "isActive":        1 if availability == "Available" else 0,
            # Verpakking
            "embAchat":        emb_achat,        # Eart_EmbAchat
            "qEmbAchat":       q_emb_achat,      # Eart_QEmbAchat
            "contUniteArt":    cont_unite_art,   # Eart_ContUniteArticle
            "contEmbAchat":    cont_emb_achat,   # Eart_ContEmbAchat (eenheidsmaat)
            "orderInfoNL":     order_info_nl,    # Eart_order_info_nl
            "minUnitEAN":      min_unit_ean,     # ECONOMAT_barcodes (stuk)
            "maxUnitEAN":      max_unit_ean,     # ECONOMAT_barcodes (karton)
            "minBestel":       min_bestel,
            "BF_UOMCode":      min_unit,         # Bidfood UOMCode voor bestelling
            "uniteArticle":    unite_article,    # Eart_UniteArticle       # Eart_min_Bestelhoeveelheid
            # Prijs & BTW
            "price":           price,
            "VAT":             vat,              # Eart_BTWinkoop_ID
            # Categorisatie
            "familyNL":        family_nl,
            "familyFR":        family_fr,
            "subFamilyNL":     subfamily_nl,
            "subFamilyFR":     subfamily_fr,
            "syr_groep":       syr_groep,        # Eart_Groupe
            "syr_subgroep":    syr_subgroep,     # Eart_SGroupe
        })

    # Output schrijven
    output = {
        "generated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
        "source_file":  os.path.basename(os.path.realpath(INPUT_PATH)),
        "total":        total,
        "articles":     articles,
    }

    tmp = OUTPUT_PATH + ".tmp"
    with open(tmp, "w", encoding="utf-8") as f:
        json.dump(output, f, ensure_ascii=False, separators=(",", ":"))
    os.replace(tmp, OUTPUT_PATH)

    gz_tmp = OUTPUT_PATH + ".gz.tmp"
    with open(OUTPUT_PATH, "rb") as fi:
        with gzip.open(gz_tmp, "wb") as fo:
            shutil.copyfileobj(fi, fo)
    os.replace(gz_tmp, OUTPUT_PATH + ".gz")

    elapsed = time.time() - t0
    log.info(f"Klaar in {elapsed:.1f}s | {stats['volledig']} volledig / {stats['enkel_groep']} enkel groep / {stats['geen']} geen")
    if unmapped:
        for fam, cnt in sorted(unmapped.items(), key=lambda x: -x[1]):
            log.warning(f"  {cnt:3d}x niet gemapt: {fam}")

    print(f"OK: {total} artikelen → {OUTPUT_PATH} ({elapsed:.1f}s)")
    print(f"    {stats['volledig']} volledig / {stats['enkel_groep']} enkel groep / {stats['geen']} geen")

if __name__ == "__main__":
    convert()
