/* ═══════════════════════════════════════════════════════════════════
   ScannerData.jsx — Scanner data layer (Session 9)
   ═══════════════════════════════════════════════════════════════════

   Provides:
   • useScannerData()    — React hook with state + scan()/reset() actions
   • scanSite(url)       — low-level API call to Railway /brain/scan
   • normalizeScanData() — converts Railway response → Scanner.jsx's
                           expected `data` shape (fills gaps gracefully)
   • enrichBrandData()   — SESSION 9: client-side colors/fonts extraction
                           via Railway /brain/scan/test stylesheet fetch
   • EMPTY_SCAN_DATA     — default fallback data for pre-scan render

   Session 7 scope:
     Real data:  Overview, Sitemap, Tech, Files & Docs, Recs
     Stubbed:    Brand Kit, Contact, Images, Maps, Icons, SEO, ADA

   Session 8 scope:
     Real data:  Contact tab (emails/phones/socials/address from Railway)

   Session 9 scope:
     Real data:  Brand Kit Colors/Typography extracted client-side from
                 fetched stylesheets (matches scanner.html v5.58 logic).
                 data.colors[] + data.fonts[] populated from real CSS.

   Session A scope (this build):
     Real data:  Logo URL extraction (img filename match + og:image +
                 apple-touch-icon + favicon fallback chain).
                 data.logoUrl, data.faviconUrl, data.ogImage.
                 Dynamic primary email/phone labels from contact data.
                 data.primaryEmailLabel, data.primaryPhoneLabel.
     Pending:    DNS records (TechTab still stubbed — next session).

   Error handling:
   • PLAN_LIMIT_SCANS  → returns error.code + error.upsell (UI shows upgrade)
   • PAYMENT_FAILED    → returns error.code + error.upgrade_url
   • SCAN_RATE_LIMITED → returns error.code + retry hint
   • Network / timeout → returns error.code='NETWORK'
   ═══════════════════════════════════════════════════════════════════ */

(function(){
  'use strict';

  const { useState, useCallback } = React;

  /* ── CONFIG ──────────────────────────────────────────────────── */
  const RAILWAY_URL = (window.WPSB_CONFIG && window.WPSB_CONFIG.RAILWAY_URL)
                    || 'https://wpsitebeam-railway-api-production.up.railway.app';

  /* ── API CLIENT ──────────────────────────────────────────────── */
  /* Calls Railway /brain/scan with Supabase auth token (if signed in).
     Returns {success,data} or {success:false,error:{...}}. */
  async function scanSite(rawUrl, scanOptions) {
    // Normalize URL — prefix https:// if missing
    let siteUrl = (rawUrl || '').trim();
    if (!siteUrl) return { success:false, error:{ code:'INVALID_URL', message:'Enter a URL first.' } };
    if (!/^https?:\/\//i.test(siteUrl)) siteUrl = 'https://' + siteUrl;

    try { new URL(siteUrl); } catch(e) {
      return { success:false, error:{ code:'INVALID_URL', message:'That does not look like a valid URL.' } };
    }

    // Get Supabase session for auth
    let token = null;
    try {
      if (window.supabase_client?.auth?.getSession) {
        const { data } = await window.supabase_client.auth.getSession();
        token = data?.session?.access_token || null;
      }
    } catch(e) { /* no session — allow anonymous scan */ }

    const headers = { 'Content-Type': 'application/json' };
    if (token) headers['Authorization'] = 'Bearer ' + token;

    /* v1.3.8 patch h3: forward scan_options to Railway. The server can
       use these to scope which optional work it does (e.g. skip image
       sitemap walking when opts.icons === false, save bandwidth). For
       now the server ignores most of these — the toggles primarily
       shape client-side enrichment behaviour. Sending them anyway so
       a future server upgrade doesn't require a frontend release. */
    const requestBody = { site_url: siteUrl };
    if (scanOptions && typeof scanOptions === 'object') {
      requestBody.scan_options = {
        modes: scanOptions.modes || {},
        opts: scanOptions.opts || {},
        adaOpts: scanOptions.adaOpts || {},
      };
    }

    let res, body;
    try {
      res = await fetch(RAILWAY_URL + '/brain/scan', {
        method: 'POST',
        mode: 'cors',
        headers,
        body: JSON.stringify(requestBody),
      });
      body = await res.json().catch(() => ({}));
    } catch(networkErr) {
      return {
        success: false,
        error: {
          code: 'NETWORK',
          message: 'Could not reach the scanner service. Check your connection and try again.',
          raw: networkErr.message,
        },
      };
    }

    if (!res.ok) {
      return {
        success: false,
        error: {
          code: body.code || 'HTTP_' + res.status,
          message: body.error || body.message || ('Server returned HTTP ' + res.status),
          upsell: body.upsell || null,
          upgrade_url: body.upgrade_url || null,
          limit: body.limit,
          used: body.used,
          plan: body.plan,
        },
      };
    }

    if (!body.success) {
      return {
        success: false,
        error: { code: 'SCAN_FAILED', message: body.message || 'Scan failed.' },
      };
    }

    return { success: true, data: body };
  }

  /* ── SESSION 9: BRAND EXTRACTION (client-side, runs AFTER /brain/scan) ───
     Matches scanner.html v5.58 extractColors + extractFonts + fetchStylesheets
     logic. Railway /brain/scan returns platform/contact/files/tech/etc. but
     NOT colors or fonts — those are browser-side extractions from fetched CSS.
     This enrichment runs as a post-scan pass, fetching stylesheets via
     /brain/scan/test (which acts as a CORS-safe proxy). */

  const _stylesheetCache = {};

  async function fetchStylesheetsCss(body, origin, maxSheets) {
    maxSheets = maxSheets || 8;
    const seen = {};
    const urls = [];
    let m;

    // <link rel="stylesheet" href="...">
    const linkRe = /<link[^>]+rel=["']stylesheet["'][^>]+href=["']([^"']+)["']/gi;
    while ((m = linkRe.exec(body)) !== null) urls.push(m[1]);
    const linkRe2 = /<link[^>]+href=["']([^"']+)["'][^>]+rel=["']stylesheet["']/gi;
    while ((m = linkRe2.exec(body)) !== null) urls.push(m[1]);
    // Any <link href="*.css">
    const altLinkRe = /<link[^>]+href=["']([^"']+\.css[^"']*?)["']/gi;
    while ((m = altLinkRe.exec(body)) !== null) {
      if (urls.indexOf(m[1]) < 0) urls.push(m[1]);
    }

    // Resolve to absolute + filter noise
    const absUrls = urls.map(u => {
      if (u.indexOf('//') === 0) return 'https:' + u;
      if (u.charAt(0) === '/') return origin + u;
      if (!/^https?:/i.test(u)) return origin + '/' + u.replace(/^\.?\//, '');
      return u;
    }).filter(u => {
      if (seen[u]) return false;
      seen[u] = 1;
      if (/googletagmanager|doubleclick|\/analytics|facebook\.net\//i.test(u)) return false;
      return true;
    }).slice(0, maxSheets);

    const out = [];
    for (let i = 0; i < absUrls.length; i++) {
      const u = absUrls[i];
      if (_stylesheetCache[u]) { out.push(_stylesheetCache[u]); continue; }
      try {
        const r = await fetch(RAILWAY_URL + '/brain/scan/test?url=' + encodeURIComponent(u));
        if (!r.ok) continue;
        const d = await r.json();
        if (d.body_preview && typeof d.body_preview === 'string' && d.body_preview.length > 10) {
          _stylesheetCache[u] = d.body_preview;
          out.push(d.body_preview);
        }
      } catch (e) { /* skip failed */ }
    }
    return out.join('\n\n');
  }

  /* Extract brand colors with selective scope.

     v1.3.8 patch h7: complete rewrite. Previous version did a broad hex
     scan over the entire CSS bundle (every external stylesheet, every
     theme/plugin), which dragged in dozens of random colors from
     bundled libraries (jQuery UI defaults, plugin widget skins, Elementor
     icon-library colors, etc.). User correctly observed that the result
     wasn't the site's actual brand palette.

     New strategy: only look at sources that the site OWNER controls:

       Pass 1: Elementor / theme custom-property brand tokens
               (--e-global-color-*, --primary, --wp--preset--color--*).
               These are explicitly designer-chosen and tagged with role
               (primary/secondary/accent/text). Highest confidence.

       Pass 2: HOMEPAGE inline <style> blocks. These hold the
               theme-customizer / Elementor kit / customizer overrides
               that style this specific site — the actual brand
               palette. External stylesheets are NOT scanned (that was
               the noise source).

       Pass 3: HOMEPAGE inline `style="color: ..."` and `style="background:..."`
               attributes on real content elements. Catches custom
               per-element brand applications that don't go through
               theme tokens.

     Filter (applied to Pass 2 + 3):
       - Drop near-greys in the #888-#aaa range that are usually theme
         body text defaults the user doesn't consider "brand".
       - Drop fully transparent / 00000000.
       - Drop colors that appear in Elementor's icon-library or
         third-party widget CSS markers.

     For CD specifically, this should yield ~7 colors matching the
     designer's stated palette: black, white, #59228c (purple),
     #faa652 (orange), and a small set of grays/accents. */
  function extractColorsFromCss(body, stylesheetCss) {
    const out = [];          /* preserves insertion order for de-dup */
    const seen = {};         /* hex → index in out[] (for upgrading role later) */

    const toHex = n => { const s = n.toString(16).toUpperCase(); return s.length === 1 ? '0'+s : s; };
    function colorToHex(value) {
      const v = value.trim().replace(/^["']|["']$/g, '');
      if (/^#[0-9a-fA-F]{3,8}$/.test(v)) {
        let h = v.slice(1).toUpperCase();
        if (h.length === 3) h = h[0]+h[0]+h[1]+h[1]+h[2]+h[2];
        else if (h.length === 8) h = h.slice(0, 6);  /* drop alpha */
        return h.length === 6 ? '#' + h : null;
      }
      const rgbM = v.match(/^rgba?\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)/i);
      if (rgbM) {
        const r = +rgbM[1], g = +rgbM[2], b = +rgbM[3];
        if (r > 255 || g > 255 || b > 255) return null;
        return '#' + toHex(r) + toHex(g) + toHex(b);
      }
      return null;
    }

    function add(hex, role, name) {
      if (!hex) return;
      if (seen[hex] !== undefined) {
        const existing = out[seen[hex]];
        if (role && !existing.role)  existing.role = role;
        if (name && !existing.name)  existing.name = name;
        return;
      }
      if (out.length >= 25) return;  /* hard cap — brand palettes are small */
      seen[hex] = out.length;
      out.push({ hex, role, name });
    }

    /* Reject obvious noise: muddy mid-greys that aren't intentional
       brand choices (theme-default body text fallback colors). Only
       applies to Pass 2/3 results — explicit brand tokens (Pass 1)
       always pass through. */
    function isMuddyGrey(hex) {
      const r = parseInt(hex.slice(1, 3), 16);
      const g = parseInt(hex.slice(3, 5), 16);
      const b = parseInt(hex.slice(5, 7), 16);
      const min = Math.min(r, g, b), max = Math.max(r, g, b);
      const isNeutral = (max - min) < 8;  /* ~grey if RGB channels close */
      if (!isNeutral) return false;
      /* Allow pure black, pure white, and very-light/very-dark greys */
      if (max <= 32) return false;        /* deep blacks */
      if (max >= 240) return false;       /* near-whites */
      /* Reject the muddy middle */
      return true;
    }

    /* ── Pass 1: Elementor / theme brand-color custom properties ──
       Body INCLUDES the homepage's inline <style> blocks; that's
       where Elementor renders --e-global-color-* tokens. */
    const customPropRe = /--([\w-]+)\s*:\s*([^;}]+)[;}]/g;
    let m;
    while ((m = customPropRe.exec(body)) !== null) {
      const propName = m[1].toLowerCase();
      const value = m[2].trim();
      const hex = colorToHex(value);
      if (!hex) continue;
      let role = null, name = null;
      if (/^e-global-color-primary$/.test(propName) || /^primary(-color)?$/.test(propName) || /^color-primary$/.test(propName) || /^brand(-color)?$/.test(propName) || /^wp--preset--color--primary$/.test(propName)) {
        role = 'primary'; name = 'Primary';
      } else if (/^e-global-color-secondary$/.test(propName) || /^secondary(-color)?$/.test(propName) || /^color-secondary$/.test(propName) || /^wp--preset--color--secondary$/.test(propName)) {
        role = 'secondary'; name = 'Secondary';
      } else if (/^e-global-color-accent$/.test(propName) || /^accent(-color)?$/.test(propName) || /^color-accent$/.test(propName)) {
        role = 'accent'; name = 'Accent';
      } else if (/^e-global-color-text$/.test(propName) || /^text(-color)?$/.test(propName) || /^color-text$/.test(propName) || /^body-text$/.test(propName)) {
        role = 'text'; name = 'Text';
      } else if (/^e-global-color-/.test(propName)) {
        role = 'brand'; name = m[1];
      } else if (/^wp--preset--color--/.test(propName)) {
        role = 'brand'; name = propName.replace('wp--preset--color--', '');
      } else {
        continue;
      }
      add(hex, role, name);
    }

    /* ── Pass 2: scan HOMEPAGE inline <style> blocks only ──
       Skip external stylesheetCss entirely — that's the noise source.
       Inline <style> contains the customizer + Elementor kit declarations. */
    const inlineStyleRe = /<style\b[^>]*>([\s\S]*?)<\/style>/gi;
    let sm;
    while ((sm = inlineStyleRe.exec(body)) !== null) {
      const css = sm[1];
      /* Look for property:value pairs where the property is a real
         visual-color property (color, background, border-color, etc.).
         Random `fill:#abcabc` in inline SVG noise is excluded. */
      const propValRe = /(?:^|[\{;\s])\s*(color|background|background-color|border-color|border-top-color|border-bottom-color|border-left-color|border-right-color|outline-color|fill|stroke)\s*:\s*([^;}!]+)/gi;
      let pm;
      while ((pm = propValRe.exec(css)) !== null) {
        const value = pm[2].trim().replace(/!important$/i, '').trim();
        const hex = colorToHex(value);
        if (!hex) continue;
        if (isMuddyGrey(hex)) continue;
        add(hex);
      }
    }

    /* ── Pass 3: inline style="..." attributes on real content ──
       Captures per-element brand applications. Only color/background-related
       props; skips layout-only inline styles. */
    const inlineAttrRe = /\bstyle=["']([^"']*)["']/g;
    let am;
    let scanned = 0;
    while ((am = inlineAttrRe.exec(body)) !== null && scanned < 200) {
      scanned++;
      const block = am[1];
      const propValRe = /(color|background|background-color|border-color)\s*:\s*([^;]+)/gi;
      let pm;
      while ((pm = propValRe.exec(block)) !== null) {
        const value = pm[2].trim();
        const hex = colorToHex(value);
        if (!hex) continue;
        if (isMuddyGrey(hex)) continue;
        add(hex);
      }
    }

    return out;
  }

  /* ── Server-managed icon library patterns ──────────────────────────
     Fetched once per session from /scanner/icon-libraries. The server
     hosts the canonical list so updates roll out without a frontend
     deploy. Falls back to the baked-in patterns below if fetch fails
     (offline, server down, CORS issue, etc.).

     Architecture choice: server returns regex strings (not RegExp objects,
     since JSON can't carry those). Frontend converts to RegExp at first
     use with case-insensitive flag. Schema version returned for
     observability/debugging. */
  let _iconLibraryPatternsCache = null;
  let _iconLibraryFingerprintsCache = null;
  let _iconLibraryFetchPromise = null;

  async function getIconLibraryPatterns() {
    if (_iconLibraryPatternsCache) return _iconLibraryPatternsCache;
    if (_iconLibraryFetchPromise) return _iconLibraryFetchPromise;

    _iconLibraryFetchPromise = (async () => {
      try {
        const res = await fetch(RAILWAY_URL + '/scanner/icon-libraries', {
          /* 1h cache hint for the browser; server also sets Cache-Control */
          cache: 'default',
        });
        if (!res.ok) throw new Error('HTTP ' + res.status);
        const data = await res.json();
        if (!Array.isArray(data.patterns) || data.patterns.length === 0) {
          throw new Error('Empty patterns list');
        }
        const compiled = data.patterns
          .map(p => {
            try { return { id: p.id, regex: new RegExp(p.regex, 'i'), label: p.label }; }
            catch (e) { return null; }
          })
          .filter(Boolean);
        if (compiled.length === 0) throw new Error('No patterns compiled successfully');
        _iconLibraryPatternsCache = compiled;

        /* Also compile fingerprints — URL-pattern + DOM-marker based
           detection that catches platform-specific icon delivery (Webflow
           SVG sprites, Shopify Liquid icons, Wix CDN, Elementor 3.x vs
           4.0 atomic, etc.). Cached alongside patterns. */
        if (Array.isArray(data.fingerprints)) {
          _iconLibraryFingerprintsCache = data.fingerprints
            .map(fp => {
              try {
                return {
                  id: fp.id,
                  name: fp.name,
                  category: fp.category,
                  platforms: fp.platforms || ['*'],
                  urlPatterns: (fp.url_patterns || []).map(s => new RegExp(s, 'i')),
                  domMarkers: fp.dom_markers || [],
                  notes: fp.notes,
                };
              } catch (e) { return null; }
            })
            .filter(Boolean);
        }
        return compiled;
      } catch (e) {
        console.warn('[WPSB Scanner] Icon library patterns fetch failed, using baked-in fallback:', e.message);
        _iconLibraryPatternsCache = BAKED_IN_ICON_PATTERNS;
        _iconLibraryFingerprintsCache = [];
        return _iconLibraryPatternsCache;
      }
    })();

    return _iconLibraryFetchPromise;
  }

  /* Returns fingerprints (compiled with RegExp). Call AFTER
     getIconLibraryPatterns() so the cache is populated. */
  function getIconLibraryFingerprints() {
    return _iconLibraryFingerprintsCache || [];
  }

  /* Scan homepage body + stylesheet CSS for fingerprint matches.
     Returns array of detected libraries: [{id, name, category, notes,
     matchedVia: 'url'|'dom', evidence: '<matched string snippet>'}].

     Matching strategy:
       1. URL patterns — search ALL <link href=>, <script src=>, <img src=>,
          and any string in CSS that looks like a URL. First match wins
          per fingerprint.
       2. DOM markers — substring match against the body HTML. Looser
          than URL but covers inline sprites + custom elements.

     Designed to complement (not replace) extractFontsFromCss — which
     handles the @font-face naming case. Some libraries are detected
     BOTH ways (Font Awesome via @font-face AND via kit URL), in which
     case the fingerprint result takes priority for the display label
     since it's more specific (e.g. "Font Awesome Kit" vs "Font Awesome"). */
  function detectIconFingerprints(body, stylesheetCss) {
    const fingerprints = getIconLibraryFingerprints();
    if (!fingerprints.length || !body) return [];
    const haystack = body + (stylesheetCss ? '\n' + stylesheetCss : '');
    const detected = [];
    const seen = {};
    for (const fp of fingerprints) {
      let matchedVia = null;
      let evidence = null;
      /* URL pattern check — first match wins */
      for (const re of fp.urlPatterns) {
        const m = haystack.match(re);
        if (m) { matchedVia = 'url'; evidence = m[0].slice(0, 80); break; }
      }
      /* DOM marker fallback — only check if no URL match */
      if (!matchedVia && fp.domMarkers.length) {
        for (const marker of fp.domMarkers) {
          const idx = haystack.indexOf(marker);
          if (idx >= 0) {
            matchedVia = 'dom';
            evidence = haystack.slice(idx, idx + Math.max(60, marker.length + 20));
            break;
          }
        }
      }
      if (matchedVia && !seen[fp.id]) {
        seen[fp.id] = 1;
        detected.push({
          id: fp.id, name: fp.name, category: fp.category,
          notes: fp.notes, matchedVia, evidence,
        });
      }
    }
    return detected;
  }

  /* Baked-in fallback — used when /scanner/icon-libraries can't be reached.
     Smaller subset covering the most common libraries; the server-hosted
     list is the canonical, comprehensive version. */
  const BAKED_IN_ICON_PATTERNS = [
    { id:'fontawesome',     regex:/^font\s*awesome/i,           label:'Font Awesome' },
    { id:'fa_short',         regex:/^fa[-\s]/i,                  label:'Font Awesome (short)' },
    { id:'material_icons',   regex:/^material[-\s]?icons?/i,    label:'Material Icons' },
    { id:'dashicons',        regex:/^dashicons?/i,              label:'Dashicons' },
    { id:'lucide',           regex:/^lucide/i,                  label:'Lucide' },
    { id:'heroicons',        regex:/^heroicons?/i,              label:'Heroicons' },
    { id:'tabler',           regex:/^tabler/i,                  label:'Tabler Icons' },
    { id:'icomoon',          regex:/icomoon/i,                  label:'IcoMoon' },
    { id:'hugeicons',        regex:/^huge[-\s]?icons?/i,        label:'Hugeicons' },
    { id:'flaticon',         regex:/^flat[-\s]?icons?/i,        label:'Flaticon' },
    { id:'streamline',       regex:/^streamline/i,              label:'Streamline' },
    { id:'fontello',         regex:/^fontello/i,                label:'Fontello' },
    { id:'ends_in_icons',    regex:/^[\w-]*[-\s]icons?$/i,      label:'Generic *-icons' },
    { id:'contains_icons',   regex:/\bicons?\b/i,               label:'Generic contains icons' },
  ];

  /* Extract font families from @font-face + font-family CSS declarations.

     Returns { fonts, iconLibraries } — icon libraries are technically
     loaded via @font-face but are NOT brand fonts and should not appear
     in the Typography section. They get their own list, surfaced under
     Brand Kit → Icon Libraries.

     Detection of icon libraries: name contains "icon", "icomoon",
     "fontawesome", "material-icons", "glyphicons", "feather", "ionicons",
     "lucide", "heroicons", "phosphor", "tabler", "bootstrap-icons", or
     ends in "-icons" / "icons-".
  */
  /* Extracts font families from CSS — separates brand fonts from icon
     libraries via the patterns argument (defaults to BAKED_IN_ICON_PATTERNS,
     but the caller can pass server-fetched patterns from /scanner/icon-libraries
     for fresher coverage). */
  function extractFontsFromCss(body, stylesheetCss, iconPatterns) {
    const combined = body + (stylesheetCss ? '\n<style>' + stylesheetCss + '</style>' : '');
    const fonts = {};
    const iconLibraries = {};
    const SKIP = ['inherit', 'initial', 'unset', 'revert', 'none', 'normal', 'auto', 'currentcolor', 'var', 'transparent'];
    const SYSTEM = ['arial','helvetica','times new roman','times','georgia','verdana',
      'trebuchet ms','courier new','courier','impact','tahoma','palatino','garamond',
      'system-ui','-apple-system','blinkmacsystemfont','segoe ui','roboto',
      'sans-serif','serif','monospace','cursive','fantasy','ui-sans-serif','ui-serif','ui-monospace','emoji'];

    /* Use server-provided patterns when available, baked-in fallback otherwise.
       Pattern object shape: { id, regex, label } where regex is a RegExp instance. */
    const patterns = (Array.isArray(iconPatterns) && iconPatterns.length > 0)
      ? iconPatterns
      : BAKED_IN_ICON_PATTERNS;
    function isIconLibrary(name) {
      const low = name.toLowerCase();
      return patterns.some(p => p.regex.test(low));
    }

    function addEntry(name, source) {
      const cleanName = name.trim().replace(/^["'`]|["'`]$/g, '').trim();
      if (!cleanName || cleanName.length > 80) return;
      const low = cleanName.toLowerCase();
      if (SKIP.indexOf(low) >= 0) return;

      const target = isIconLibrary(cleanName) ? iconLibraries : fonts;
      if (!target[cleanName]) {
        target[cleanName] = {
          name: cleanName,
          source: source,
          weights: [],
          uses: 0,
          isSystem: SYSTEM.indexOf(low) >= 0,
        };
      }
      target[cleanName].uses += 1;
    }

    // Pass 1: @font-face { font-family: "Name"; }
    const faceRe = /@font-face\s*{[^}]*font-family\s*:\s*([^;}]+)[;}]/gi;
    let m;
    while ((m = faceRe.exec(combined)) !== null) {
      m[1].split(',').forEach(n => addEntry(n, '@font-face'));
    }

    // Pass 2: font-family: Name, "Other Name", sans-serif;
    const fontFamilyRe = /font-family\s*:\s*([^;}]+)[;}]/gi;
    while ((m = fontFamilyRe.exec(combined)) !== null) {
      m[1].split(',').forEach(n => addEntry(n, 'CSS rule'));
    }

    // Pass 3: Google Fonts <link href="fonts.googleapis.com/css?family=Name">
    const googleRe = /fonts\.googleapis\.com\/css[^"'>\s]*\?family=([^"'&>\s]+)/gi;
    while ((m = googleRe.exec(combined)) !== null) {
      decodeURIComponent(m[1]).split('|').forEach(familyBlock => {
        const name = familyBlock.split(':')[0].replace(/\+/g, ' ');
        if (name) addEntry(name, 'Google Fonts');
      });
    }

    // Pass 4: Google Fonts CSS2 syntax — fonts.googleapis.com/css2?family=Name
    const google2Re = /fonts\.googleapis\.com\/css2[^"'>\s]*\?(?:[^"'>\s]*&)?family=([^"'&>\s]+)/gi;
    while ((m = google2Re.exec(combined)) !== null) {
      const name = decodeURIComponent(m[1]).split(':')[0].replace(/\+/g, ' ');
      if (name) addEntry(name, 'Google Fonts');
    }

    // Pass 5: Adobe Fonts (Typekit) — use.typekit.net or use.fontawesome.com
    if (/use\.typekit\.net/i.test(combined)) {
      // Typekit kit IDs don't reveal font names without API call — surface the kit
      const kitMatch = combined.match(/use\.typekit\.net\/([a-z0-9]+)/i);
      if (kitMatch) addEntry('Adobe Fonts (Typekit)', 'Adobe Fonts');
    }

    // Sort: non-system first (more likely brand fonts), then by use count desc
    const sortFn = (a, b) => {
      if (a.isSystem !== b.isSystem) return a.isSystem ? 1 : -1;
      return b.uses - a.uses;
    };
    return {
      fonts: Object.values(fonts).sort(sortFn),
      iconLibraries: Object.values(iconLibraries).sort((a, b) => b.uses - a.uses),
    };
  }

  /* Extract site logo from HTML body. Tries multiple platform conventions.

     Priority order (highest confidence first):
       1. WordPress custom-logo classes — <img class="custom-logo" ...>
          Theme-installed site logo; reliable on most modern WP themes.
       2. Elementor logo widget output — class="elementor-widget-theme-site-logo"
          or img inside .elementor-widget-image with logo-related src.
       3. Site identity microformat — itemprop="logo" or rel="logo"
       4. Schema.org Organization markup with logo property
       5. <img> tags with "logo" in src or alt (fuzzy match)
       6. <header> / <nav> first <img> (site usually puts logo in header)
       7. og:image fallback
       8. apple-touch-icon
       9. /favicon.ico ultimate fallback

     Same pattern applies to Joomla (.site-logo), Squarespace (.site-title img),
     Wix (.site-logo), Shopify (.header__logo-image).

     All URLs resolved to absolute. */
  function extractLogoUrl(body, origin) {
    if (!body || typeof body !== 'string') return { logo: null, og: null, favicon: null };

    function absolute(u) {
      if (!u) return null;
      u = u.trim();
      /* v1.3.8 patch: JSON-LD URLs contain JSON-escaped chars like `\/`
         (forward slash) — JSON allows but doesn't require this escape.
         Browsers reject `https:\/\/example.com` as an invalid src URL,
         so the image silently fails to load and the BrandKit shows the
         fallback initial. Unescape `\/` and `\\` before any other
         normalization happens. Other JSON escapes (\u0026, \", \n) don't
         legitimately appear in URLs and are intentionally not handled. */
      u = u.replace(/\\\//g, '/').replace(/\\\\/g, '\\');
      if (u.indexOf('//') === 0) return 'https:' + u;
      if (u.charAt(0) === '/') return origin + u;
      if (!/^https?:/i.test(u)) return origin + '/' + u.replace(/^\.?\//, '');
      return u;
    }

    /* Helper: pull src from a single <img> tag string */
    function srcOf(imgTag) {
      const m = imgTag.match(/\bsrc=["']([^"']+)["']/i);
      return m ? m[1] : null;
    }
    function classOf(imgTag) {
      const m = imgTag.match(/\bclass=["']([^"']+)["']/i);
      return m ? m[1] : '';
    }
    function altOf(imgTag) {
      const m = imgTag.match(/\balt=["']([^"']*?)["']/i);
      return m ? m[1] : '';
    }

    let logo = null, og = null, favicon = null;

    /* ── Pass 1: WordPress custom-logo class ─────────────────────────
       Most modern WP themes wrap the site logo in an <img class="custom-logo">.
       This is the strongest signal — a site identity field. */
    const customLogoMatch = body.match(/<img\s+[^>]*\bclass=["'][^"']*\bcustom-logo\b[^"']*["'][^>]*>/i);
    if (customLogoMatch) {
      const src = srcOf(customLogoMatch[0]);
      if (src && !src.startsWith('data:')) logo = absolute(src);
    }

    /* ── Pass 2: Elementor site logo widget ──────────────────────────
       Elementor's theme builder uses .elementor-widget-theme-site-logo
       wrapper with an <img> inside. Match the wrapper, then find the img. */
    if (!logo) {
      const elemSiteLogo = body.match(/<[^>]+class=["'][^"']*\belementor-widget-theme-site-logo\b[^"']*["'][^>]*>([\s\S]*?)<\/[^>]+>/i);
      if (elemSiteLogo) {
        const innerImg = elemSiteLogo[1].match(/<img\s+[^>]+>/i);
        if (innerImg) {
          const src = srcOf(innerImg[0]);
          if (src && !src.startsWith('data:')) logo = absolute(src);
        }
      }
    }

    /* ── Pass 3: itemprop="logo" / rel="logo" microformat ────────────  */
    if (!logo) {
      const itemMatch = body.match(/<[^>]+(?:itemprop|rel)=["']logo["'][^>]+(?:src|href|content)=["']([^"']+)["']/i);
      if (itemMatch) logo = absolute(itemMatch[1]);
    }

    /* ── Pass 4: Schema.org Organization JSON-LD logo ────────────────  */
    if (!logo) {
      const ldMatch = body.match(/<script[^>]+type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/i);
      if (ldMatch) {
        const ldStr = ldMatch[1];
        const logoUrlMatch = ldStr.match(/"logo"\s*:\s*(?:"([^"]+)"|\{[^}]*"url"\s*:\s*"([^"]+)"[^}]*\})/);
        if (logoUrlMatch) logo = absolute(logoUrlMatch[1] || logoUrlMatch[2]);
      }
    }

    /* ── Pass 5: <img> tags with 'logo' in src/alt/class (fuzzy) ────── */
    if (!logo) {
      const imgRe = /<img\s+[^>]*>/gi;
      let m, candidates = [];
      while ((m = imgRe.exec(body)) !== null) {
        const tag = m[0];
        const src = srcOf(tag);
        if (!src || src.startsWith('data:')) continue;
        const alt = altOf(tag);
        const cls = classOf(tag);
        const isLogo = /\blogo\b/i.test(src) || /\blogo\b/i.test(alt) || /\blogo\b/i.test(cls);
        const isNav  = /sitebuilder\/images\/(navbar|spacer)/i.test(src);
        const isIcon = /favicon|cdn-cgi/i.test(src);
        if (isNav || isIcon) continue;
        if (isLogo) candidates.push(src);
      }
      if (candidates.length) logo = absolute(candidates[0]);
    }

    /* ── Pass 6: First img inside <header> or <nav> ──────────────────
       Site logos almost always live in the header. Find the first img
       inside the first header/nav element as a last-ditch heuristic. */
    if (!logo) {
      const headerMatch = body.match(/<(header|nav)\b[^>]*>([\s\S]*?)<\/(header|nav)>/i);
      if (headerMatch) {
        const firstImg = headerMatch[2].match(/<img\s+[^>]+>/i);
        if (firstImg) {
          const src = srcOf(firstImg[0]);
          if (src && !src.startsWith('data:') && !/favicon|cdn-cgi/i.test(src)) {
            logo = absolute(src);
          }
        }
      }
    }

    /* ── og:image (fallback for socially-shared logo or hero shot) ── */
    const ogMatch = body.match(/<meta[^>]+property=["']og:image["'][^>]+content=["']([^"']+)["']/i)
                 || body.match(/<meta[^>]+content=["']([^"']+)["'][^>]+property=["']og:image["']/i);
    if (ogMatch) og = absolute(ogMatch[1]);

    /* ── Favicon — try every known link relation ────────────────── */
    /* apple-touch-icon (best resolution, iOS preferred) */
    const atMatch = body.match(/<link[^>]+rel=["']apple-touch-icon(?:-precomposed)?["'][^>]+href=["']([^"']+)["']/i);
    if (atMatch) favicon = absolute(atMatch[1]);

    /* Standard <link rel="icon"> (with sizes preference for largest) */
    if (!favicon) {
      const iconMatches = body.match(/<link[^>]+rel=["'](?:shortcut )?icon["'][^>]+>/gi) || [];
      let bestSize = 0;
      for (const tag of iconMatches) {
        const href = (tag.match(/\bhref=["']([^"']+)["']/i) || [])[1];
        if (!href) continue;
        const sizesAttr = (tag.match(/\bsizes=["']([^"']+)["']/i) || [])[1] || '';
        const sizeNum = parseInt((sizesAttr.match(/\d+/) || [0])[0], 10);
        if (sizeNum >= bestSize) {
          bestSize = sizeNum;
          favicon = absolute(href);
        }
      }
    }

    /* mask-icon (Safari pinned tab — usually SVG) */
    if (!favicon) {
      const maskMatch = body.match(/<link[^>]+rel=["']mask-icon["'][^>]+href=["']([^"']+)["']/i);
      if (maskMatch) favicon = absolute(maskMatch[1]);
    }

    /* msapplication-TileImage (Windows Start tile) */
    if (!favicon) {
      const tileMatch = body.match(/<meta[^>]+name=["']msapplication-TileImage["'][^>]+content=["']([^"']+)["']/i);
      if (tileMatch) favicon = absolute(tileMatch[1]);
    }

    /* Ultimate fallback — most sites have /favicon.ico even without the link tag */
    if (!favicon) favicon = origin + '/favicon.ico';

    /* v1.3.8 patch h9: don't return og:image or favicon AS the logo.
       Previously this code returned `logo: logo || og || favicon` as a
       "fallback chain" — but that meant when no real logo was found,
       the function returned the og:image (or favicon) URL labeled as
       the logo. Calling code had no way to distinguish a real logo
       from a fallback. The Brand Kit then either showed the wrong
       image (og:image instead of logo) or labeled the favicon as logo.
       Now: logo is returned as null when no real logo is found. The
       Brand Kit's own fallback chain (data.logoUrl → data.ogImage →
       initial char) decides what to show. */
    return { logo, og, favicon };
  }

  /* v1.3.8 patch h9: DOMParser-based logo/favicon extraction.

     Regex-based parsing is fragile against edge HTML — nested quotes,
     malformed attributes, encoded characters, broken closing tags. The
     previous regex chain in extractLogoUrl missed CD's actual logo for
     reasons that took 5+ patches to track down.

     DOMParser uses the browser's native HTML parser, which handles
     every weird case web servers throw at it. Querying via CSS
     selectors is also far more readable than regex chains.

     Returns { logo, ogImage, favicon } — clean structured values, no
     fallback aliasing. */
  function extractBrandAssetsViaDOMParser(homeBody, origin) {
    if (!homeBody || typeof DOMParser === 'undefined') return null;
    let doc;
    try {
      doc = new DOMParser().parseFromString(homeBody, 'text/html');
    } catch (e) {
      console.warn('[WPSB Scanner] DOMParser failed:', e.message);
      return null;
    }

    function abs(url) {
      if (!url) return null;
      try { return new URL(url, origin).toString(); }
      catch { return null; }
    }

    /* ── LOGO: priority chain ──────────────────────────────────────
       1. <img class="custom-logo">                  (WordPress core)
       2. .elementor-widget-theme-site-logo img      (Elementor)
       3. [itemprop="logo"]                          (Microdata)
       4. JSON-LD Organization.logo                  (Schema.org)
       5. .site-logo img / .logo img / #logo img     (theme conventions)
       6. <a class="navbar-brand"> img               (Bootstrap)
       7. First img inside <header> with no nav role
                                                     (last-resort heuristic)
       Skips: data: URIs, 1x1 pixels, inline SVG sprites. */
    let logo = null;

    function takeImgSrc(el) {
      if (!el) return null;
      const tag = el.tagName ? el.tagName.toLowerCase() : '';
      let src = null;
      if (tag === 'img') src = el.getAttribute('src');
      else {
        const inner = el.querySelector('img[src]');
        if (inner) src = inner.getAttribute('src');
      }
      if (!src) return null;
      if (src.startsWith('data:')) return null;
      const w = parseInt(el.getAttribute('width'), 10);
      const h = parseInt(el.getAttribute('height'), 10);
      if (w === 1 && h === 1) return null;  /* tracking pixel */
      return abs(src);
    }

    /* 1 — WordPress core .custom-logo class */
    logo = takeImgSrc(doc.querySelector('img.custom-logo'));

    /* 2 — Elementor site-logo widget */
    if (!logo) logo = takeImgSrc(doc.querySelector('.elementor-widget-theme-site-logo img'));

    /* 3 — Microdata itemprop="logo" */
    if (!logo) {
      const ip = doc.querySelector('[itemprop="logo"]');
      if (ip) {
        if (ip.tagName.toLowerCase() === 'img') logo = abs(ip.getAttribute('src'));
        else if (ip.getAttribute('href')) logo = abs(ip.getAttribute('href'));
        else if (ip.getAttribute('content')) logo = abs(ip.getAttribute('content'));
        else logo = takeImgSrc(ip);
      }
    }

    /* 4 — JSON-LD Organization.logo. Walk all ld+json scripts. */
    if (!logo) {
      const ldScripts = doc.querySelectorAll('script[type="application/ld+json"]');
      for (const s of ldScripts) {
        try {
          const data = JSON.parse(s.textContent || '');
          const entities = Array.isArray(data) ? data : (data['@graph'] || [data]);
          for (const ent of entities) {
            if (!ent || typeof ent !== 'object') continue;
            const t = ent['@type'];
            const types = Array.isArray(t) ? t : [t];
            if (types.some(x => /Organization|LocalBusiness/i.test(String(x)))) {
              const lg = ent.logo;
              if (lg) {
                if (typeof lg === 'string') { logo = abs(lg); break; }
                if (typeof lg === 'object' && lg.url) { logo = abs(lg.url); break; }
              }
            }
          }
          if (logo) break;
        } catch (e) { /* invalid JSON-LD, skip */ }
      }
    }

    /* 5 — theme convention class names */
    if (!logo) {
      for (const sel of ['.site-logo img', '.logo img', '#logo img', '.brand-logo img']) {
        const found = doc.querySelector(sel);
        if (found) {
          logo = takeImgSrc(found);
          if (logo) break;
        }
      }
    }

    /* 6 — Bootstrap-style .navbar-brand img */
    if (!logo) logo = takeImgSrc(doc.querySelector('.navbar-brand img, a.navbar-brand'));

    /* 7 — first img inside any <header> */
    if (!logo) {
      const headers = doc.querySelectorAll('header img');
      for (const img of headers) {
        const src = img.getAttribute('src');
        if (!src || src.startsWith('data:')) continue;
        const w = parseInt(img.getAttribute('width'), 10);
        const h = parseInt(img.getAttribute('height'), 10);
        if (w === 1 && h === 1) continue;
        logo = abs(src);
        if (logo) break;
      }
    }

    /* ── FAVICON: link tags + ultimate /favicon.ico fallback ────── */
    let favicon = null;
    /* apple-touch-icon (iOS preferred — usually 180x180+) */
    const at = doc.querySelector('link[rel*="apple-touch-icon"]');
    if (at && at.getAttribute('href')) favicon = abs(at.getAttribute('href'));
    /* Standard <link rel="icon"> with size preference */
    if (!favicon) {
      const icons = doc.querySelectorAll('link[rel~="icon"]');
      let bestSize = 0;
      for (const el of icons) {
        const href = el.getAttribute('href');
        if (!href) continue;
        const sz = parseInt((el.getAttribute('sizes') || '').match(/\d+/)?.[0] || '0', 10);
        if (sz >= bestSize) {
          bestSize = sz;
          favicon = abs(href);
        }
      }
    }
    /* Final fallback */
    if (!favicon) favicon = abs('/favicon.ico');

    /* ── OG IMAGE — for social preview tile ──────────────────────── */
    let ogImage = null;
    const og = doc.querySelector('meta[property="og:image"], meta[name="og:image"]');
    if (og && og.getAttribute('content')) ogImage = abs(og.getAttribute('content'));

    return { logo, ogImage, favicon };
  }

  async function enrichBrandData(rawScan, siteUrl) {
    if (!rawScan || !siteUrl) return rawScan;
    try {
      const origin = new URL(siteUrl).origin;

      /* v1.3.8 patch h8: 429-tolerant fetch. If the brand body fetch
         gets rate-limited, retry once after a short pause. If still
         failing, abort the brand-enrichment client-side work but leave
         the server-extracted values intact. */
      let homeRes = await fetch(RAILWAY_URL + '/brain/scan/test?url=' + encodeURIComponent(siteUrl));
      if (homeRes.status === 429) {
        await new Promise(r => setTimeout(r, 1500));
        homeRes = await fetch(RAILWAY_URL + '/brain/scan/test?url=' + encodeURIComponent(siteUrl));
      }
      if (!homeRes.ok) return rawScan;
      const homeData = await homeRes.json().catch(() => ({}));
      const homeBody = homeData.body_preview || '';
      if (!homeBody) return rawScan;

      const sheetCss = await fetchStylesheetsCss(homeBody, origin, 8);
      rawScan.colors = extractColorsFromCss(homeBody, sheetCss);
      /* Fetch latest icon-library patterns from server (cached 1h).
         Falls back to baked-in patterns if fetch fails. Also populates
         the fingerprints cache used by detectIconFingerprints below. */
      const iconPatterns = await getIconLibraryPatterns();
      const typography = extractFontsFromCss(homeBody, sheetCss, iconPatterns);
      rawScan.fonts = typography.fonts;

      /* Merge font-family-detected libraries with URL/DOM-fingerprint
         detected libraries. Fingerprint detection covers platform-specific
         icon delivery (Webflow SVG sprites, Shopify Liquid icons, Wix CDN,
         Elementor 3.x vs 4.0 atomic) where font-family naming fails.
         Dedup on id; fingerprint version wins on collision (more specific). */
      const fontFamilyIcons = typography.iconLibraries || [];
      const fingerprintIcons = detectIconFingerprints(homeBody, sheetCss);
      const mergedIcons = [];
      const seenIconIds = {};
      /* Fingerprints first (more specific) */
      for (const fp of fingerprintIcons) {
        if (seenIconIds[fp.id]) continue;
        seenIconIds[fp.id] = 1;
        mergedIcons.push({
          name: fp.name,
          source: fp.matchedVia === 'url' ? 'CDN/URL' : 'DOM marker',
          uses: 1,
          category: fp.category,
          notes: fp.notes,
          evidence: fp.evidence,
          isSystem: false,
        });
      }
      /* Then font-family-named libraries that didn't already match */
      for (const ff of fontFamilyIcons) {
        const lcName = (ff.name || '').toLowerCase();
        const dupe = mergedIcons.some(m => m.name.toLowerCase() === lcName);
        if (!dupe) mergedIcons.push(ff);
      }
      rawScan.icon_libraries = mergedIcons;

      /* Session A: logo + og:image + favicon extraction.

         v1.3.8 patch h8: don't overwrite server values with null.
         v1.3.8 patch h9: try DOMParser first (more reliable than regex
         against weird HTML). Falls back to regex if DOMParser unavailable
         or returns nothing. Server-extracted values still authoritative.

         Three sources of values for each field, priority order:
           1. Server-extracted (raw.logo_url etc.) — set by presentation
              enrichment in server.js. Authoritative when present.
           2. DOMParser client extraction. Reliable for normal HTML.
           3. Regex client extraction. Final fallback. */
      let logoFromClient = null, ogFromClient = null, faviconFromClient = null;
      try {
        const dom = extractBrandAssetsViaDOMParser(homeBody, origin);
        if (dom) {
          logoFromClient = dom.logo;
          ogFromClient = dom.ogImage;
          faviconFromClient = dom.favicon;
        }
      } catch (e) {
        console.warn('[WPSB Scanner] DOMParser extraction failed:', e.message);
      }
      /* Regex-based extractor as further fallback */
      if (!logoFromClient || !ogFromClient || !faviconFromClient) {
        const regexResult = extractLogoUrl(homeBody, origin);
        if (!logoFromClient) logoFromClient = regexResult.logo;
        if (!ogFromClient) ogFromClient = regexResult.og;
        if (!faviconFromClient) faviconFromClient = regexResult.favicon;
      }

      /* Stamp on rawScan with proper precedence:
         server (if present) → client extraction (if found) → leave null.
         CRITICAL: only overwrite when current value is null/undefined. */
      if (!rawScan.logo_url && logoFromClient) rawScan.logo_url = logoFromClient;
      if (!rawScan.og_image && ogFromClient) rawScan.og_image = ogFromClient;
      if (!rawScan.favicon_url && faviconFromClient) rawScan.favicon_url = faviconFromClient;

      /* Session images-tab (2026-04-24): client-side <img> extraction.
         Railway's scanner module only returns image URLs inside documents.images[]
         — no alt text, no dimensions, no type classification. To get v1-parity
         data for the Images tab (alt/width/height/type/mime), we parse <img>
         tags from the homepage body we already fetched above. Future work:
         expand to per-page extraction via enrichment.pages URLs. */
      try {
        rawScan._clientImages = extractImagesFromBody(homeBody, origin);
      } catch (e) {
        console.warn('[WPSB Scanner] Image extraction failed:', e.message);
        rawScan._clientImages = [];
      }
    } catch (e) {
      console.warn('[WPSB Scanner] Brand enrichment failed:', e.message);
    }
    return rawScan;
  }

  /* ── extractImagesFromBody (ported from v1 scanner.html line 643) ──────
     Parses <img> tags from raw HTML and returns structured image metadata.
     Filters SiteBuilder UI chrome (navbar-*, spacer.gif) and cdn-cgi paths
     just like v1 did. Deduplicates by resolved src URL. Caps at 200 entries
     (defensive — same as v1). */
  function extractImagesFromBody(body, origin) {
    if (!body || typeof body !== 'string') return [];
    const imgs = {};
    const tagRe = /<img[^>]+>/gi;
    const originClean = (origin || '').replace(/\/+$/, '');
    let m;
    while ((m = tagRe.exec(body)) !== null && Object.keys(imgs).length < 200) {
      const tag = m[0];
      const srcMatch = tag.match(/src=["']([^"']+)["']/i);
      let src = srcMatch ? srcMatch[1] : null;
      if (!src || src.startsWith('data:')) continue;
      /* Skip UI chrome (same filters as v1) */
      if (/sitebuilder[\/\\]images[\/\\]navbar-/i.test(src)) continue;
      if (/sitebuilder[\/\\]images[\/\\]spacer/i.test(src)) continue;
      if (/\/cdn-cgi\//.test(src)) continue;
      src = src.trim();
      /* Normalize to absolute URL */
      if (src.startsWith('//')) src = 'https:' + src;
      else if (src.startsWith('/')) src = originClean + src;
      else if (!src.startsWith('http')) src = originClean + '/' + src.replace(/^\.\//, '');
      /* Attributes */
      const alt = (tag.match(/alt=["']([^"']*?)["']/i) || [])[1] || '';
      const w = parseInt((tag.match(/width=["']?(\d+)/i) || [])[1] || 0, 10);
      const h = parseInt((tag.match(/height=["']?(\d+)/i) || [])[1] || 0, 10);
      const ext = (src.split('?')[0].split('.').pop() || 'jpg').toLowerCase();
      const mimeMap = {
        jpg: 'image/jpeg', jpeg: 'image/jpeg', png: 'image/png',
        gif: 'image/gif', webp: 'image/webp', svg: 'image/svg+xml',
        ico: 'image/x-icon', avif: 'image/avif', bmp: 'image/bmp',
      };
      const mime = mimeMap[ext] || 'image/jpeg';
      const sl = src.toLowerCase();
      const type = sl.includes('logo') ? 'logo'
                 : sl.includes('icon') ? 'icon'
                 : (sl.includes('hero') || sl.includes('banner')) ? 'hero'
                 : sl.includes('thumb') ? 'thumbnail'
                 : 'image';
      const filename = src.split('/').pop().split('?')[0] || 'image';
      /* First-seen wins — dedupe by src URL so the same image on multiple
         pages doesn't appear twice. */
      if (!imgs[src]) {
        imgs[src] = {
          id: `img-${Object.keys(imgs).length}`,
          src, alt, type,
          width: w || null,
          height: h || null,
          filename, mime, ext,
        };
      }
    }
    return Object.values(imgs);
  }

  /* ── extractMetaFromBody (ported from v1 scanner.html line 813) ────────
     Parses <title>, <meta>, <link rel=canonical>, og:*, twitter:*, etc.
     Returns flat object {title, description, og_image, canonical, ...}. */
  function extractMetaFromBody(body) {
    if (!body || typeof body !== 'string') return {};
    const meta = {};
    const pats = {
      title:        /<title[^>]*>([^<]{1,200})<\/title>/i,
      description:  /<meta[^>]+name=["']description["'][^>]+content=["']([^"']{1,500})/i,
      og_title:     /<meta[^>]+property=["']og:title["'][^>]+content=["']([^"']+)/i,
      og_description:/<meta[^>]+property=["']og:description["'][^>]+content=["']([^"']+)/i,
      og_image:     /<meta[^>]+property=["']og:image["'][^>]+content=["']([^"']+)/i,
      og_site_name: /<meta[^>]+property=["']og:site_name["'][^>]+content=["']([^"']+)/i,
      canonical:    /<link[^>]+rel=["']canonical["'][^>]+href=["']([^"']+)/i,
      generator:    /<meta[^>]+name=["']generator["'][^>]+content=["']([^"']+)/i,
      keywords:     /<meta[^>]+name=["']keywords["'][^>]+content=["']([^"']+)/i,
      author:       /<meta[^>]+name=["']author["'][^>]+content=["']([^"']+)/i,
      robots:       /<meta[^>]+name=["']robots["'][^>]+content=["']([^"']+)/i,
      lang:         /<html[^>]+lang=["']([^"']+)/i,
      twitter_title:/<meta[^>]+name=["']twitter:title["'][^>]+content=["']([^"']+)/i,
      twitter_desc: /<meta[^>]+name=["']twitter:description["'][^>]+content=["']([^"']+)/i,
      twitter_image:/<meta[^>]+name=["']twitter:image["'][^>]+content=["']([^"']+)/i,
    };
    Object.keys(pats).forEach(k => {
      const m = body.match(pats[k]);
      if (m) meta[k] = m[1].trim();
    });
    return meta;
  }

  /* ── extractFormsFromBody (v1.3.8 patch h2) ─────────────────────────
     Walks every <form> tag in the body, classifies its platform, and
     extracts visible form fields. Mirrors the server-side forms walker
     so per-page extraction (via enrichPagesData) covers the same cases.

     Without per-page form detection we miss every contact form that
     lives on /contact/, /contact-us/, /get-in-touch/ — i.e. nearly
     every WordPress agency site.

     Filters:
       - Search forms (role="search", search-form class, etc.)
       - Honeypot inputs (hidden, named honeypot, aria-hidden)
       - Submit/button/image inputs (not user-fillable)
       - Hidden inputs (CSRF tokens, nonces, etc.)

     Platform classification covers: Gravity, CF7, WPForms, Ninja,
     Fluent, Elementor, Formidable, HubSpot, Mailchimp, native. */
  /* ── extractLinksAndFilesFromBody ─────────────────────────────────────
     Pulls two datasets from fetched page HTML:
       - externalLinks: [{href, text, domain}] — links to other domains
       - fileLinks: [{href, text, ext, isInternal}] — linked documents (.pdf etc)
     Called by processOne and used to populate:
       - _clientFiles (PDFs, docs across all pages)
       - pg.links / pg.external_links (for integrations detection)
  */
  function extractLinksAndFilesFromBody(body, pageUrl, siteOrigin) {
    if (!body || typeof body !== 'string') return { externalLinks: [], fileLinks: [] };
    const FILE_EXTS = /\.(pdf|docx?|xlsx?|pptx?|zip|rar|txt|rtf|csv|odt|ods|odp)(?:[?#]|$)/i;
    const externalLinks = [];
    const fileLinks = [];
    const seenHrefs = new Set();

    /* Pre-extract section boundaries for source tagging.
       We tag each link as 'header', 'footer', 'nav', or 'body'
       by checking character position against known section tags.
       This lets integrations sort header/footer/nav links to the top. */
    function getSection(matchIndex) {
      const before = body.substring(0, matchIndex);
      // Walk backward from match position to find containing section
      const headerOpen  = before.lastIndexOf('<header');
      const headerClose = before.lastIndexOf('</header>');
      const footerOpen  = before.lastIndexOf('<footer');
      const footerClose = before.lastIndexOf('</footer>');
      const navOpen     = before.lastIndexOf('<nav');
      const navClose    = before.lastIndexOf('</nav>');
      if (headerOpen > headerClose && headerOpen >= 0) return 'header';
      if (footerOpen > footerClose && footerOpen >= 0) return 'footer';
      if (navOpen > navClose && navOpen >= 0) return 'nav';
      // Also check Elementor/theme nav classes
      const nearContext = body.substring(Math.max(0, matchIndex - 2000), matchIndex);
      if (/class=["'][^"']*(?:nav|menu|header)[^"']*["']/i.test(nearContext)) return 'nav';
      if (/class=["'][^"']*(?:footer|site-footer)[^"']*["']/i.test(nearContext)) return 'footer';
      return 'body';
    }

    // Extract href + anchor text from <a> tags
    const aRe = /<a\b[^>]*\bhref=["']([^"'\s]+)["'][^>]*>(.*?)<\/a>/gi;
    let m;
    while ((m = aRe.exec(body)) !== null) {
      const href = (m[1] || '').trim();
      const raw_text = (m[2] || '').replace(/<[^>]+>/g, '').replace(/\s+/g, ' ').trim().substring(0, 120);
      if (!href || href.startsWith('#') || href.startsWith('mailto:') || href.startsWith('tel:') || href.startsWith('javascript:')) continue;
      if (seenHrefs.has(href)) continue;
      seenHrefs.add(href);
      // Resolve relative URLs
      let fullHref = href;
      try {
        fullHref = new URL(href, pageUrl || siteOrigin).href;
      } catch(e) { continue; }
      // File detection (internal or external)
      if (FILE_EXTS.test(fullHref)) {
        const extMatch = fullHref.match(FILE_EXTS);
        const isInternal = siteOrigin && fullHref.startsWith(siteOrigin);
        fileLinks.push({ href: fullHref, text: raw_text || href.split('/').pop(), ext: (extMatch && extMatch[1].toLowerCase()) || 'file', isInternal: !!isInternal, page_url: pageUrl });
      }
      // External link detection
      try {
        const u = new URL(fullHref);
        const linkDomain = u.hostname.toLowerCase();
        const siteDomain = siteOrigin ? new URL(siteOrigin).hostname.toLowerCase() : '';
        if (siteDomain && linkDomain !== siteDomain && !linkDomain.endsWith('.' + siteDomain)) {
          const section = getSection(m.index);
          externalLinks.push({ href: fullHref, text: raw_text, domain: linkDomain, page_url: pageUrl, section });
        }
      } catch(e) {}
    }
    return { externalLinks, fileLinks };
  }

  /* ── extractMapsFromBody ──────────────────────────────────────────────────
     Extracts embedded maps from page HTML. Detects:
       - Google Maps iframes (maps.google.com/maps?q=... or google.com/maps/embed)
       - Elementor Google Maps widget (widget-google_maps CSS + iframe)
       - Mapbox iframes (api.mapbox.com)
       - OpenStreetMap iframes (openstreetmap.org)
       - Bing Maps iframes
       - Leaflet (div + leaflet CSS/JS signal)
     Returns: [{provider, src, title, address, lat, lng, page_url}]
  */
  function extractMapsFromBody(body, pageUrl) {
    if (!body || typeof body !== 'string') return [];
    const maps = [];
    const seen = new Set();

    /* Google Maps iframes */
    const iframeRe = /<iframe\b([^>]*)>(.*?)<\/iframe>/gi;
    let im;
    while ((im = iframeRe.exec(body)) !== null) {
      const attrs = im[1] || '';
      const srcM = attrs.match(/\bsrc=["']([^"']+)["']/i);
      if (!srcM) continue;
      const src = srcM[1];
      const titleM = attrs.match(/\btitle=["']([^"']+)["']/i);
      const title = titleM ? decodeURIComponent(titleM[1]) : null;
      let provider = null, address = null;

      if (/maps\.google\.com|google\.com\/maps/i.test(src)) {
        provider = 'Google Maps';
        /* Extract address from q= param */
        const qM = src.match(/[?&]q=([^&]+)/i);
        if (qM) address = decodeURIComponent(qM[1].replace(/\+/g, ' '));
        else if (title) address = title;
      } else if (/mapbox\.com/i.test(src)) {
        provider = 'Mapbox';
      } else if (/openstreetmap\.org/i.test(src)) {
        provider = 'OpenStreetMap';
      } else if (/bing\.com\/maps/i.test(src)) {
        provider = 'Bing Maps';
      }
      if (!provider) continue;
      const key = src.substring(0, 80);
      if (seen.has(key)) continue;
      seen.add(key);
      maps.push({ provider, src, title, address, page_url: pageUrl });
    }
    /* Leaflet signal — div with leaflet class but no iframe */
    if (!maps.length && /leaflet-container|leaflet-map/i.test(body)) {
      maps.push({ provider: 'Leaflet Map', src: null, title: null, address: null, page_url: pageUrl });
    }
    /* ACF / plugin map shortcode */
    if (/\[map|\[leaflet-map|\[wpgooglemaps/i.test(body)) {
      if (!maps.length) maps.push({ provider: 'Map Shortcode', src: null, title: null, address: null, page_url: pageUrl });
    }
    return maps;
  }

  function extractFormsFromBody(body, pageUrl) {
    if (!body || typeof body !== 'string') return [];
    const out = [];
    const formRe = /<form\b([^>]*)>([\s\S]*?)<\/form>/gi;
    let fm;
    let formsSeen = 0;
    while ((fm = formRe.exec(body)) !== null && formsSeen < 30) {
      const attrs = fm[1] || '';
      const inner = fm[2] || '';
      const haystack = (attrs + ' ' + inner.substring(0, 4000)).toLowerCase();
      if (/role=["']search["']/i.test(attrs)) continue;
      if (/wp-block-search|searchform|search-form/i.test(attrs)) continue;
      /* ── Platform detection — sourced from plugin HTML output analysis.
         Priority order: most-specific signatures first. Each platform has
         primary class/id signals + asset path fallback.
         Updated: May 2026 from plugin zip review. */
      let platform = 'native';
      let platformLabel = 'Custom Form';
      /* 1. Gravity Forms — gform_ prefix, gfield class, asset path */
      if (/gform_wrapper|gform_fields|gform_heading|class=["'][^"']*gfield[^"']*["']|id=["']gform_\d/i.test(haystack) ||
          /\/plugins\/gravityforms\//i.test(haystack)) {
        platform = 'gravity'; platformLabel = 'Gravity Forms';
      /* 2. Contact Form 7 — wpcf7 prefix, data-wpcf7-id, asset path */
      } else if (/wpcf7-form|wpcf7-form-control|class=["'][^"']*wpcf7[^"']*["']|data-wpcf7-id/i.test(haystack) ||
                 /\/plugins\/contact-form-7\//i.test(haystack)) {
        platform = 'cf7'; platformLabel = 'Contact Form 7';
      /* 3. WPForms — wpforms-form, wpforms-field prefix */
      } else if (/wpforms-form|class=["'][^"']*wpforms-field[^"']*["']|data-formid=["']\d/i.test(haystack) ||
                 /\/plugins\/wpforms/i.test(haystack)) {
        platform = 'wpforms'; platformLabel = 'WPForms';
      /* 4. Ninja Forms — nf-form-cont, nf-field-container */
      } else if (/nf-form-cont|nf-form-wrap|nf-field-container|class=["'][^"']*nf-field[^"']*["']|ninja-forms-req-symbol/i.test(haystack) ||
                 /\/plugins\/ninja-forms\//i.test(haystack)) {
        platform = 'ninja'; platformLabel = 'Ninja Forms';
      /* 5. Fluent Forms — fluentform id, ff-el- prefix, ff-btn-submit */
      } else if (/fluentform|ff-el-group|ff-el-input|ff-btn-submit|class=["'][^"']*ff-el-[^"']*["']|class=["'][^"']*ff_form/i.test(haystack) ||
                 /\/plugins\/fluentform\//i.test(haystack)) {
        platform = 'fluent'; platformLabel = 'Fluent Forms';
      /* 6. Formidable Forms — frm_form_field, frm-show-form */
      } else if (/frm_form_field|frm-show-form|frm_fields_container|class=["'][^"']*frm_type_[^"']*["']|data-frmval/i.test(haystack) ||
                 /\/plugins\/formidable\//i.test(haystack)) {
        platform = 'formidable'; platformLabel = 'Formidable Forms';
      /* 7. Forminator — forminator-form, forminator-field prefix */
      } else if (/forminator-form|forminator-custom-form|class=["'][^"']*forminator-field[^"']*["']|forminator-button-submit/i.test(haystack) ||
                 /\/plugins\/forminator\//i.test(haystack)) {
        platform = 'forminator'; platformLabel = 'Forminator';
      /* 8. SureForms — srfm-block-wrap, srfm-input-common */
      } else if (/srfm-block-wrap|srfm-input-common|srfm-form-container|class=["'][^"']*srfm-[^"']*["']|data-block-id/i.test(haystack) ||
                 /\/plugins\/sureforms\//i.test(haystack)) {
        platform = 'sureforms'; platformLabel = 'SureForms';
      /* 9. WS Form — wsf-button, ws-form-wrapper, fl-ws-form */
      } else if (/ws-form-wrapper|wsf-button|fl-ws-form|class=["'][^"']*ws-form-[^"']*["']|data-wsf-/i.test(haystack) ||
                 /\/plugins\/ws-form\//i.test(haystack)) {
        platform = 'wsform'; platformLabel = 'WS Form';
      /* 10. MetForm — mf-form-shortcode, mf-input-wrapper */
      } else if (/mf-form-shortcode|mf-input-wrapper|mf_form_wrapper|class=["'][^"']*mf-form[^"']*["']|class=["'][^"']*metform-[^"']*["']|data-form-id/i.test(haystack) ||
                 /\/plugins\/metform\//i.test(haystack)) {
        platform = 'metform'; platformLabel = 'MetForm';
      /* 11. Calculated Fields Form — cff-form, cff_form_builder, data-nonce+CP_ */
      } else if (/cff-form|cff_form_builder|CP_CALCULATEDFIELDSF|class=["'][^"']*cff-[^"']*["']|data-cff-/i.test(haystack) ||
                 /\/plugins\/calculated-fields-form\//i.test(haystack)) {
        platform = 'calc'; platformLabel = 'Calculated Fields Form';
      /* 12. Elementor Forms — elementor-form, elementor-field-group */
      } else if (/elementor-form|class=["'][^"']*elementor-field-group[^"']*["']|elementor-field-type-/i.test(haystack)) {
        platform = 'elementor'; platformLabel = 'Elementor Forms';
      /* 13. HubSpot — hbspt-form, hs-form, hs-button */
      } else if (/hbspt-form|class=["'][^"']*hs-form[^"']*["']|hs-button|hs-fieldtype-/i.test(haystack)) {
        platform = 'hubspot'; platformLabel = 'HubSpot Form';
      /* 14. JotForm — jotform embed, jotform class prefix */
      } else if (/jotform|class=["'][^"']*jf-[^"']*["']|jotformEmbedding/i.test(haystack) ||
                 /jotform\.com/i.test(haystack)) {
        platform = 'jotform'; platformLabel = 'JotForm';
      /* 15. Mailchimp / MC4WP */
      } else if (/mc4wp|mc-embedded|class=["'][^"']*mc4wp-form[^"']*["']|mailchimp-subscribe/i.test(haystack)) {
        platform = 'mailchimp'; platformLabel = 'Mailchimp / MC4WP';
      /* 16. ActiveCampaign forms */
      } else if (/activecampaign|class=["'][^"']*_form_[^"']*["']|_form_submit/i.test(haystack) ||
                 /activehosted\.com/i.test(haystack)) {
        platform = 'activecampaign'; platformLabel = 'ActiveCampaign';
      /* 17. Klaviyo */
      } else if (/klaviyo-form|class=["'][^"']*klaviyo-[^"']*["']|klaviyoForm/i.test(haystack)) {
        platform = 'klaviyo'; platformLabel = 'Klaviyo';
      }
      const actionMatch = attrs.match(/\baction=["']([^"']*)["']/i);
      const methodMatch = attrs.match(/\bmethod=["']([^"']*)["']/i);
      const idMatch = attrs.match(/\bid=["']([^"']+)["']/i);

      /* v1.3.8 patch h6: label extraction.
         Pre-build a label map BEFORE walking inputs. Three strategies:

         1. <label for="<input_id>"> Label text </label>
            — Most reliable. Both Gravity, CF7, WPForms, native HTML use it.
            Maps input id → cleaned label text.

         2. <fieldset>...<legend>Label</legend>...<input name="x"/>...</fieldset>
            — Used by composite fields (name with first/last, consent, etc.)
            All inputs inside the same fieldset share the legend as label.

         3. <label> wrapper without `for` attribute, where the input lives
            INSIDE the label tag.
            — Less common but valid HTML.

         The map is keyed by both input id AND name so the field walker
         can look up by either. Cleanup strips inner <span class="gfield_required">
         and similar nested markup, collapses whitespace, and trims. */
      const labelMap = {};   /* input id or name → label text */
      const cleanLabel = (raw) => {
        return String(raw || '')
          .replace(/<[^>]+>/g, ' ')   /* strip any nested HTML */
          .replace(/&nbsp;/gi, ' ')
          .replace(/&amp;/gi, '&')
          .replace(/&quot;/gi, '"')
          .replace(/&#039;|&apos;/gi, "'")
          .replace(/&lt;/gi, '<')
          .replace(/&gt;/gi, '>')
          .replace(/\*/g, '')          /* strip required-asterisk text */
          .replace(/\s+/g, ' ')
          .trim();
      };

      /* Strategy 1: <label for="..."> */
      const labelForRe = /<label\b[^>]*\bfor=["']([^"']+)["'][^>]*>([\s\S]*?)<\/label>/gi;
      let lm;
      while ((lm = labelForRe.exec(inner)) !== null) {
        const cleaned = cleanLabel(lm[2]);
        if (cleaned && cleaned.length <= 200) {
          /* ID could collide with name in some sites. Set both — name lookup
             uses normalized id pattern (input_<formid>_<fieldid>), name uses
             input_<n>. The walker will try both. */
          labelMap['#' + lm[1]] = cleaned;
        }
      }

      /* Strategy 2: <fieldset><legend>...</legend>...<input>...</fieldset>
         Walk every fieldset, capture its legend, then walk inputs inside
         and tag them all with the legend text. Falls back gracefully if
         the field has its own <label for=> (strategy 1 wins, set first). */
      const fieldsetRe = /<fieldset\b[^>]*>([\s\S]*?)<\/fieldset>/gi;
      let fsm;
      while ((fsm = fieldsetRe.exec(inner)) !== null) {
        const fsInner = fsm[1];
        const legendM = fsInner.match(/<legend\b[^>]*>([\s\S]*?)<\/legend>/i);
        if (!legendM) continue;
        const legendText = cleanLabel(legendM[1]);
        if (!legendText || legendText.length > 200) continue;
        const innerInputRe = /<(?:input|select|textarea)\b[^>]*\bname=["']([^"']+)["']/gi;
        let iim;
        while ((iim = innerInputRe.exec(fsInner)) !== null) {
          const nameKey = '@' + iim[1];
          if (!labelMap[nameKey]) labelMap[nameKey] = legendText;
        }
      }

      /* Strategy 3: <label> wraps input WITHOUT for= attribute.
         CF7 pattern: <label> Label Text<br><span ...><input name="x"/></span></label>
         Also handles WPForms/native HTML wrapper labels.
         Extract: text node before <br> or before first child element.
         Key: input's name attribute (from span data-name or input name). */
      const labelWrapRe = /<label\b([^>]*)>([\s\S]*?)<\/label>/gi;
      let lwm;
      while ((lwm = labelWrapRe.exec(inner)) !== null) {
        const lAttrs = lwm[1] || '';
        const lInner = lwm[2] || '';
        /* Skip if already handled by for= strategy */
        if (/\bfor=["']/i.test(lAttrs)) continue;
        /* Extract label text: everything before <br>, <span>, or <input> */
        const textBeforeChild = lInner.split(/<(?:br|span|input|select|textarea)\b/i)[0];
        const labelText = cleanLabel(textBeforeChild);
        if (!labelText || labelText.length > 200 || labelText.length < 1) continue;
        /* Strategy 3a: CF7 uses data-name on wpcf7-form-control-wrap span */
        const dataNamesM = lInner.match(/data-name=["']([^"']+)["']/gi) || [];
        dataNamesM.forEach(function(dn) {
          const nm = dn.match(/data-name=["']([^"']+)["']/i);
          if (nm) {
            const key = '@' + nm[1];
            if (!labelMap[key]) labelMap[key] = labelText;
          }
        });
        /* Strategy 3b: find input name directly inside the label */
        const inputInLabel = lInner.match(/<(?:input|select|textarea)\b[^>]*\bname=["']([^"']+)["']/i);
        if (inputInLabel) {
          const key = '@' + inputInLabel[1];
          if (!labelMap[key]) labelMap[key] = labelText;
        }
        /* Strategy 3c: find input id inside the label */
        const inputIdInLabel = lInner.match(/<(?:input|select|textarea)\b[^>]*\bid=["']([^"']+)["']/i);
        if (inputIdInLabel) {
          const key = '#' + inputIdInLabel[1];
          if (!labelMap[key]) labelMap[key] = labelText;
        }
      }

      const fields = [];
      const fieldRe = /<(input|select|textarea)\b([^>]*)>/gi;
      let ff;
      while ((ff = fieldRe.exec(inner)) !== null && fields.length < 30) {
        const tag = ff[1].toLowerCase();
        const fattrs = ff[2] || '';
        const nameM = fattrs.match(/\bname=["']([^"']+)["']/i);
        if (!nameM) continue;
        const typeM = fattrs.match(/\btype=["']([^"']+)["']/i);
        const placeM = fattrs.match(/\bplaceholder=["']([^"']+)["']/i);
        const idAttrM = fattrs.match(/\bid=["']([^"']+)["']/i);
        const ariaLabelM = fattrs.match(/\baria-label=["']([^"']+)["']/i);
        const reqM = /\brequired\b|aria-required=["']true["']/i.test(fattrs);
        let type = tag === 'input' ? (typeM ? typeM[1] : 'text') : tag;
        if (type === 'hidden') continue;
        if (type === 'submit' || type === 'button' || type === 'image') continue;
        if (/honeypot|gform_validation_container|akismet|ak_hp/i.test(fattrs)) continue;

        /* Resolve label: <label for=...> takes priority over fieldset
           legend, which beats aria-label and placeholder. Each is
           progressively less reliable. */
        let label = null;
        if (idAttrM && labelMap['#' + idAttrM[1]]) label = labelMap['#' + idAttrM[1]];
        else if (labelMap['@' + nameM[1]]) label = labelMap['@' + nameM[1]];
        else if (ariaLabelM) label = cleanLabel(ariaLabelM[1]);
        else if (placeM) label = cleanLabel(placeM[1]);

        fields.push({
          name: nameM[1],
          type: type,
          required: reqM,
          placeholder: placeM ? placeM[1] : null,
          label: label || null,
        });
      }
      out.push({
        id: idMatch ? idMatch[1] : null,
        page_url: pageUrl,
        action: actionMatch ? actionMatch[1] : null,
        method: methodMatch ? methodMatch[1].toLowerCase() : 'post',
        platform: platform,
        platform_label: platformLabel,
        field_count: fields.length,
        fields: fields,
      });
      formsSeen++;
    }
    return out;
  }

  /* ── extractHeadingsFromBody (ported from v1 scanner.html line 836) ─────
     Walks <h1>-<h4> tags, strips inner markup, returns [{level, text}]. */
  function extractHeadingsFromBody(body) {
    if (!body || typeof body !== 'string') return [];
    const h = [];
    const re = /<(h[1-4])[^>]*>([\s\S]{1,300}?)<\/(h[1-4])>/gi;
    let m;
    while ((m = re.exec(body)) !== null && h.length < 40) {
      const text = m[2].replace(/<[^>]+>/g, '').trim();
      if (text) h.push({ level: m[1], text });
    }
    return h;
  }


  /* ── extractContentFromBody (ported from v1 scanner.html line 962) ──────
     Extracts narrative text + structured table data. Two-pass:
       Pass 1: <span class="text"> blocks (GoDaddy SiteBuilder convention)
               Also generic <p>, <li>, <div> text extraction if no spans found.
       Pass 2: Data tables (tables with <th> or 3+ multi-column rows)
     Returns a single string. Tables are separated with [TABLE DATA] markers
     so the render layer can pretty-print them. */
  function extractContentFromBody(body) {
    if (!body || typeof body !== 'string') return '';

    // Strip noise: comments, scripts, styles
    const decoded = body
      .replace(/<!--[\s\S]*?-->/g, '')
      .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
      .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
      .replace(/&nbsp;/g, ' ').replace(/&amp;/g, '&')
      .replace(/&lt;/g, '<').replace(/&gt;/g, '>').replace(/&quot;/g, '"');

    const sections = [];
    const seenSection = {};

    // ── Pass 1a: SiteBuilder-style <span class="text">...</span> ─────
    const spanRe = /<span[^>]+class=["'][^"']*\btext\b[^"']*["'][^>]*>([\s\S]{3,5000}?)<\/span>/gi;
    let sm;
    while ((sm = spanRe.exec(decoded)) !== null) {
      const t = sm[1]
        .replace(/<br[^>]*>/gi, ' ')
        .replace(/<[^>]+>/g, ' ')
        .replace(/\s+/g, ' ').trim();
      if (t.length > 3 && !seenSection[t]) {
        seenSection[t] = 1;
        sections.push(t);
      }
    }

    // ── Pass 1b: Generic <p> fallback (modern CMS pages) ─────────────
    if (sections.length < 2) {
      const pRe = /<p[^>]*>([\s\S]{3,5000}?)<\/p>/gi;
      let pm;
      while ((pm = pRe.exec(decoded)) !== null && sections.length < 30) {
        const t = pm[1]
          .replace(/<br[^>]*>/gi, ' ')
          .replace(/<[^>]+>/g, ' ')
          .replace(/\s+/g, ' ').trim();
        if (t.length > 20 && !seenSection[t]) {
          seenSection[t] = 1;
          sections.push(t);
        }
      }
    }

    // ── Pass 1c: <li> extraction for listy pages ─────────────────────
    if (sections.length < 2) {
      const liRe = /<li[^>]*>([\s\S]{3,800}?)<\/li>/gi;
      let lm;
      while ((lm = liRe.exec(decoded)) !== null && sections.length < 40) {
        const t = lm[1]
          .replace(/<[^>]+>/g, ' ')
          .replace(/\s+/g, ' ').trim();
        if (t.length > 5 && !seenSection[t]) {
          seenSection[t] = 1;
          sections.push('• ' + t);
        }
      }
    }

    // ── Pass 2: Data tables ───────────────────────────────────────
    const tableRe = /<table[^>]*>([\s\S]*?)<\/table>/gi;
    let tm;
    while ((tm = tableRe.exec(decoded)) !== null) {
      const tableHTML = tm[1];
      const hasTH = /<th[\s>]/i.test(tableHTML);
      const rows = [];
      const rowRe = /<tr[^>]*>([\s\S]*?)<\/tr>/gi;
      let rm;
      while ((rm = rowRe.exec(tableHTML)) !== null) {
        const cells = [];
        const cellRe = /<t[dh][^>]*>([\s\S]*?)<\/t[dh]>/gi;
        let cm;
        while ((cm = cellRe.exec(rm[1])) !== null) {
          const ct = cm[1].replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
          if (ct.length > 0) cells.push(ct);
        }
        if (cells.length >= 2) rows.push(cells.join(' | '));
      }
      if ((hasTH && rows.length >= 1) || rows.length >= 3) {
        const firstCell = rows[0].split('|')[0].trim();
        const alreadyCovered = sections.some(s => s.indexOf(firstCell) >= 0);
        if (!alreadyCovered) {
          sections.push('[TABLE DATA]\n' + rows.join('\n'));
        }
      }
    }

    return sections.join('\n\n');
  }

  /* ── enrichPagesData — per-page content extraction via Railway fetch ──
     Walks discovered URLs, fetches each via /brain/scan/test, parses
     meta/headings/content/images/forms client-side. Progress callback
     fires once per fetched page so the UI's progress bar can update.

     v1.3.8 patch h8: rate-limit safety.
       • Concurrency dropped 8 → 3. Railway's per-IP global rate limit
         (500 req / 15min) was getting depleted on sites with many WP
         REST custom-post-types (CD has 80+ elementor_library design
         templates that the scanner enumerated as "pages"), causing
         downstream brand+favicon fetches to also 429-fail.
       • Junk WP CPT filter: elementor_library, e-floating-buttons,
         nav_menu_item, wp_block, attachment, revision URLs are filtered
         out as they're not user-facing pages.
       • Aggregate-listing pages (?elementor_library=, /category/, /tag/,
         /author/) also excluded from per-page enrichment.
       • 429 retry: per-fetch backoff, single retry after 1.5s pause.

     v1.3.8 patch h3 raised the cap to 300, but most sites have <100
     real pages. The cap is a ceiling, not a target. */
  const PAGE_FETCH_CAP = 300;
  const PAGE_FETCH_CONCURRENCY = 3;
  const PAGE_FETCH_RETRY_429_DELAY = 1500;

  /* ── extractLocationsFromPage ─────────────────────────────────────────────
     Detects and groups multi-location contact info from a contact page.
     Strategy:
       1. Find known city names (from rawScan's city_state_zip data) in the HTML
       2. Split the HTML into location blocks at those boundaries
       3. Extract address, phones (with labels), email, hours per block
       4. Return array of structured location objects

     Works with: Elementor column layouts, Bootstrap grid sections,
     WordPress theme contact pages, Gravity Forms location repeaters.
     Falls back gracefully — returns [] if no multi-location structure found.
  */
  function extractLocationsFromPage(html, knownCityStateZips) {
    if (!html || !Array.isArray(knownCityStateZips) || knownCityStateZips.length < 2) return [];

    // Strip scripts/styles but keep structural HTML for block detection
    const clean = html
      .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
      .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '');

    // Extract city names from "City, ST ZIP" format
    const cityNames = knownCityStateZips.map(function(csz) {
      return (csz.split(',')[0] || '').trim();
    }).filter(function(c) { return c.length > 1; });

    if (cityNames.length < 2) return [];

    /* Find city name positions in the cleaned HTML.
       Use word boundaries to avoid matching "Chapman" inside "Chapman University" etc. */
    const boundaries = [];
    cityNames.forEach(function(city) {
      // Look for city name as standalone text (in heading, bold, or standalone element)
      const re = new RegExp('(?:<[^>]*>\s*|^|>\s*)' + city.replace(/[-[\]{}()*+?.,\^$|#\s]/g, '\\$&') + '(?:\s*<|\.?\s*$|[^a-zA-Z])', 'i');
      const m = re.exec(clean);
      if (m) boundaries.push({ city, idx: m.index, csz: knownCityStateZips.find(function(z) { return z.startsWith(city); }) });
    });

    if (boundaries.length < 2) return [];
    boundaries.sort(function(a, b) { return a.idx - b.idx; });

    const US_PHONE_RE = /\(?([2-9]\d{2})\)?[\s.\-]?(\d{3})[\s.\-]?(\d{4})/g;
    const TEL_ANCHOR_RE = /<a[^>]+href=["']tel:([\+\d]+)["'][^>]*>([^<]{0,80})<\/a>/gi;
    const EMAIL_RE = /[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}/g;
    const ADDR_RE = /\b(\d{1,6}\s+[A-Za-z0-9\s.]{3,60}?(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Drive|Dr|Lane|Ln|Way|Circle|Cir|Court|Ct|Place|Pl|Pkwy|Hwy|Hwy)[^,\n]{0,30})/i;

    const locations = [];

    boundaries.forEach(function(b, i) {
      const start = b.idx;
      const end = boundaries[i + 1] ? boundaries[i + 1].idx : clean.length;
      const block = clean.substring(start, Math.min(end, start + 8000));
      const blockText = block.replace(/<[^>]+>/g, ' ').replace(/&nbsp;/g, ' ').replace(/\s+/g, ' ').trim();

      const loc = {
        name: b.city,
        city_state_zip: b.csz || b.city,
        address: null,
        phones: [],
        emails: [],
        hours: null,
        is_primary: i === 0,
      };

      // Extract address
      const addrM = ADDR_RE.exec(blockText);
      if (addrM) {
        loc.address = addrM[1].replace(/\s+/g, ' ').trim();
        // Append city/state/zip if not already included
        if (b.csz && !loc.address.includes(b.city)) {
          loc.address += ', ' + b.csz;
        }
      } else if (b.csz) {
        loc.address = b.csz;
      }

      // Extract emails
      let em;
      const emailsSeen = new Set();
      EMAIL_RE.lastIndex = 0;
      while ((em = EMAIL_RE.exec(blockText)) !== null) {
        if (!emailsSeen.has(em[0])) { emailsSeen.add(em[0]); loc.emails.push(em[0]); }
      }

      // Extract phones from tel: anchors first (most reliable — anchor text is the label)
      const phoneSeen = new Set();
      let tm;
      TEL_ANCHOR_RE.lastIndex = 0;
      while ((tm = TEL_ANCHOR_RE.exec(block)) !== null) {
        const digits = (tm[1] || '').replace(/\D/g, '');
        const key = digits.slice(-10);
        const rawLabel = (tm[2] || '').replace(/<[^>]+>/g, '').replace(/\s+/g, ' ').trim();
        // Skip if label is just the number
        const label = /^[\d\s.()\-+]+$/.test(rawLabel) ? inferPhoneLabel(rawLabel, block, tm.index) : rawLabel;
        if (key.length >= 7 && !phoneSeen.has(key)) {
          phoneSeen.add(key);
          loc.phones.push({ number: formatPhone(key), label: label || 'Phone', digits: key });
        }
      }
      // Fallback: plain-text US phone patterns
      if (loc.phones.length === 0) {
        US_PHONE_RE.lastIndex = 0;
        let pm;
        while ((pm = US_PHONE_RE.exec(blockText)) !== null && loc.phones.length < 5) {
          const key = (pm[1] + pm[2] + pm[3]);
          if (!phoneSeen.has(key)) {
            phoneSeen.add(key);
            const pre = blockText.substring(Math.max(0, pm.index - 60), pm.index);
            const lm = pre.match(/([A-Za-z][A-Za-z\s]{1,30}?)\s*:?\s*$/);
            loc.phones.push({ number: '(' + pm[1] + ') ' + pm[2] + '-' + pm[3], label: lm ? lm[1].trim() : 'Phone', digits: key });
          }
        }
      }

      locations.push(loc);
    });

    return locations.filter(function(l) { return l.phones.length > 0 || l.address; });
  }

  function inferPhoneLabel(numText, blockHtml, matchIdx) {
    // Look backward ~200 chars in block HTML for a label
    const pre = blockHtml.substring(Math.max(0, matchIdx - 200), matchIdx);
    const stripped = pre.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
    const lm = stripped.match(/([A-Za-z][A-Za-z\s]{1,30}?)\s*:?\s*$/);
    return lm ? lm[1].trim() : 'Phone';
  }

  function formatPhone(digits10) {
    if (digits10.length === 10) return '(' + digits10.slice(0,3) + ') ' + digits10.slice(3,6) + '-' + digits10.slice(6);
    return digits10;
  }

  async function enrichPagesData(rawScan, siteUrl, onProgress, options) {
    if (!rawScan || !siteUrl) return rawScan;
    const urls = Array.isArray(rawScan.urls) ? rawScan.urls : [];
    if (urls.length === 0) {
      rawScan._clientPages = [];
      rawScan._pageFetchLimited = false;
      return rawScan;
    }

    /* v1.3.8 patch h3: cap can be overridden via options.cap. Default
       is PAGE_FETCH_CAP (300). When the configurator's "Multi-page"
       toggle is OFF, the caller passes cap=1 → homepage-only walk. */
    const effectiveCap = (options && typeof options.cap === 'number' && options.cap > 0)
      ? Math.min(options.cap, PAGE_FETCH_CAP)
      : PAGE_FETCH_CAP;

    // Normalize URLs to strings + skip non-HTML endpoints
    const skipExts = /\.(pdf|jpg|jpeg|png|gif|svg|webp|ico|zip|doc|docx|xls|xlsx|mp4|mp3)(?:\?|$)/i;

    /* v1.3.8 patch h8: WordPress CPT junk-URL filter.
       The WP REST API's /wp/v2/types endpoint returns ALL custom post
       types, including internal infrastructure ones that aren't real
       pages. CD's site has 80+ elementor_library entries (header
       templates, footer templates, popups, design blocks) — none of
       which are user-facing pages. Without this filter, the scanner
       tries to fetch each one, which triggers Railway's per-IP rate
       limit (500 req / 15min) and starts returning 429s for every
       subsequent request including the brand-data fetch (which is why
       the logo + favicon stopped showing).

       Patterns dropped:
         - ?elementor_library=     (Elementor's design template CPT)
         - ?e-floating-buttons=    (Elementor's floating CTA CPT)
         - ?wp_block=, ?reusable=  (Gutenberg reusable blocks)
         - /attachment/, ?attachment_id=, /?p=N (media + raw post IDs)
         - /wp-json/, /wp-admin/, /wp-content/  (admin + asset paths)
         - /feed/, /?feed=         (RSS endpoints)
         - /xmlrpc.php             (XML-RPC endpoint)
         - /category/, /tag/, /author/ at end of URL (aggregate listing
           pages — useful for sitemap, not for per-page content scan).
           Individual posts STILL get scanned — only the listing pages
           are skipped.
         - Pagination tails: /page/2/, /page/3/  */
    const junkUrlPatterns = [
      /[?&]elementor_library=/i,
      /[?&]e-floating-buttons=/i,
      /[?&]wp_block=/i,
      /[?&]reusable=/i,
      /[?&]attachment_id=/i,
      /[?&]p=\d+(?:&|$)/i,
      /\/attachment\//i,
      /\/wp-json\//i,
      /\/wp-admin\//i,
      /\/wp-content\//i,
      /\/feed\/?$/i,
      /[?&]feed=/i,
      /\/xmlrpc\.php/i,
      /\/page\/\d+\/?$/i,
    ];
    function isJunkUrl(u) {
      for (const re of junkUrlPatterns) if (re.test(u)) return true;
      return false;
    }

    const candidateUrls = urls
      .map(u => (typeof u === 'string' ? u : (u.url || u.href || '')))
      .filter(u => u && !skipExts.test(u) && !isJunkUrl(u))
      .slice(0, effectiveCap);

    const origin = (() => {
      try { return new URL(siteUrl).origin; } catch { return ''; }
    })();

    const pages = [];
    const contentHashes = {};  // dedupe near-identical pages
    /* Forms accumulator — every <form> found across every fetched page
       gets pushed here. Server's homepage-only `forms` array becomes a
       cumulative cross-page inventory by the time enrichment finishes. */
    const allForms = [];
    const allFiles = [];  /* PDFs + docs found across all fetched pages */
    const allMaps  = [];  /* Embedded maps found across all fetched pages */
    let progressIdx = 0;

    /* Process a single URL — fetch, parse, dedupe, push.
       Returns the page object on success, null on dedupe/failure.

       v1.3.8 patch h8: 429 retry. Single retry after a fixed delay so
       bursty rate-limit hits don't cause silent page-content drops.
       After the retry fails again, the URL is logged and skipped — the
       remaining URLs still get walked. Better to lose 1 page than the
       whole scan. */
    async function fetchWithRetry(testUrl) {
      let res = await fetch(testUrl);
      if (res.status === 429) {
        await new Promise(r => setTimeout(r, PAGE_FETCH_RETRY_429_DELAY));
        res = await fetch(testUrl);
      }
      return res;
    }

    async function processOne(url, originalIdx) {
      try {
        const res = await fetchWithRetry(RAILWAY_URL + '/brain/scan/test?url=' + encodeURIComponent(url));
        if (!res.ok) {
          if (res.status === 429) {
            console.warn('[WPSB Scanner] Rate-limited on ' + url + ' (skipped after retry)');
          }
          return null;
        }
        const data = await res.json().catch(() => ({}));
        const body = data.body_preview || '';
        if (!body || body.length < 50) return null;

        /* Dedupe: hash content AFTER boilerplate offset. SiteBuilder-era sites
           have identical navbar/header markup in the first ~2000 chars of every
           page — hashing from the start would merge distinct pages. v5.36 fix
           from v1 was "duplicate detection past boilerplate (2000-char offset)".
           Skip first 2000 chars then hash next 2000 to get page-unique signature. */
        const bodyStripped = body.replace(/\s+/g, ' ');
        const hashKey = bodyStripped.length > 4000
          ? bodyStripped.substring(2000, 4000)
          : bodyStripped.substring(0, 2200);
        if (contentHashes[hashKey]) return null;
        contentHashes[hashKey] = 1;

        const meta = extractMetaFromBody(body);
        const headings = extractHeadingsFromBody(body);
        const content = extractContentFromBody(body);
        const images = extractImagesFromBody(body, origin);
        /* Form extraction per page */
        const pageForms = extractFormsFromBody(body, url);
        if (pageForms.length > 0) {
          for (const f of pageForms) allForms.push(f);
        }
        /* Link + file extraction per page — feeds _clientFiles and integrations */
        const { externalLinks, fileLinks } = extractLinksAndFilesFromBody(body, url, origin);
        if (fileLinks.length > 0) {
          for (const f of fileLinks) allFiles.push(f);
        }
        /* Map extraction per page */
        const pageMaps = extractMapsFromBody(body, url);
        if (pageMaps.length > 0) {
          for (const m of pageMaps) allMaps.push(m);
        }

        /* Location extraction — runs on contact pages only to detect multi-location structure */
        if (/\/contact/i.test(url)) {
          const knownCSZ = Array.isArray(rawScan.contact && rawScan.contact.city_state_zip)
            ? rawScan.contact.city_state_zip
            : Array.isArray(rawScan._pendingCityStateZips) ? rawScan._pendingCityStateZips : [];
          if (knownCSZ.length >= 2) {
            const locBlocks = extractLocationsFromPage(body, knownCSZ);
            if (locBlocks.length > 0 && !rawScan._clientLocations) {
              rawScan._clientLocations = locBlocks;
            }
          }
        }

        const label = (() => {
          if (originalIdx === 0) return 'Home';
          const h1 = headings.find(h => h.level === 'h1');
          if (h1 && h1.text.length < 40) return h1.text;
          const slug = url.split('/').pop() || '';
          if (slug) return slug.replace(/\.html?$/i, '').replace(/[-_]+/g, ' ').replace(/\b\w/g, c => c.toUpperCase());
          return meta.title ? meta.title.substring(0, 30) : 'Page ' + (originalIdx + 1);
        })();

        return {
          url,
          label,
          menu_name: label,
          title: meta.title || meta.og_title || '',
          description: meta.description || meta.og_description || '',
          meta,
          headings,
          content,
          images,
          forms: pageForms,
          content_length: content.length,
          image_count: images.length,
          heading_count: headings.length,
          form_count: pageForms.length,
          external_links: externalLinks,
          file_links: fileLinks,
        };
      } catch (e) {
        console.warn('[WPSB Scanner] Page fetch failed for ' + url + ':', e.message);
        return null;
      }
    }

    /* Chunked-parallel walker. Splits candidateUrls into groups of
       PAGE_FETCH_CONCURRENCY and runs each group in parallel via
       Promise.all(). Progress callback fires after each chunk completes
       so the UI sees real-time updates. */
    for (let i = 0; i < candidateUrls.length; i += PAGE_FETCH_CONCURRENCY) {
      const chunk = candidateUrls.slice(i, i + PAGE_FETCH_CONCURRENCY);
      const results = await Promise.all(chunk.map((url, j) => processOne(url, i + j)));
      for (let j = 0; j < results.length; j++) {
        if (results[j]) pages.push(results[j]);
        progressIdx++;
        if (typeof onProgress === 'function') {
          const url = chunk[j];
          onProgress({
            index: progressIdx,
            total: candidateUrls.length,
            url,
            label: (url.split('/').pop() || 'Home').replace(/\.html?$/i, '') || 'Home',
          });
        }
      }
    }

    rawScan._clientPages = pages;
    rawScan._clientForms = allForms;  /* aggregated across all fetched pages */
    rawScan._clientFiles = allFiles;  /* PDFs + docs found across all fetched pages */
    rawScan._clientMaps  = allMaps;   /* Embedded maps found across all fetched pages */
    rawScan._pageFetchLimited = urls.length > PAGE_FETCH_CAP;
    rawScan._pageFetchTotal = urls.length;
    return rawScan;
  }

  /* ── enrichDnsData — post-scan DNS record fetch via /brain/dns ──────
     The scan pulls site content. DNS records are a separate Node DNS
     resolution step at /brain/dns?host={hostname}. We call it once per
     scan and attach results to rawScan._clientDns for the TechTab to
     display. On failure we silently return empty — TechTab falls back
     to its "no records" empty state.

     v1.3.8 patch: forwards rawScan.discovered_subdomains[] as the
     ?extra_subs= query param so the DNS endpoint can probe site-specific
     subdomains found in the homepage HTML (e.g. shop.<agency>.com,
     clients.<agency>.com) — these aren't always in the static common
     list but are very real on agency / SMB sites. */
  async function enrichDnsData(rawScan, siteUrl) {
    try {
      const url = new URL(siteUrl);
      const host = url.hostname.replace(/^www\./i, '');  /* apex domain */
      if (!host) {
        rawScan._clientDns = null;
        return rawScan;
      }
      const headers = { 'Content-Type': 'application/json' };
      const token = (typeof window !== 'undefined' && window.WPSB?.getToken?.()) || null;
      if (token) headers['Authorization'] = 'Bearer ' + token;

      /* Build extra_subs query value from rawScan.discovered_subdomains.
         Sanity-check each label: lowercase, alphanumeric+hyphen only.
         Cap at 20 to keep URL reasonable. */
      const discovered = Array.isArray(rawScan.discovered_subdomains)
        ? rawScan.discovered_subdomains : [];
      const extras = discovered
        .map(s => String(s || '').trim().toLowerCase())
        .filter(s => /^[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?$/.test(s))
        .slice(0, 20);
      const extraQs = extras.length > 0
        ? '&extra_subs=' + encodeURIComponent(extras.join(','))
        : '';

      const r = await fetch(`${RAILWAY_URL}/brain/dns?host=${encodeURIComponent(host)}${extraQs}`, {
        method: 'GET',
        headers,
      });
      if (!r.ok) {
        if (typeof console !== 'undefined') console.warn('[WPSB Scanner] DNS lookup returned', r.status);
        rawScan._clientDns = null;
        return rawScan;
      }
      const j = await r.json();
      rawScan._clientDns = j;
      return rawScan;
    } catch (e) {
      if (typeof console !== 'undefined') console.warn('[WPSB Scanner] DNS fetch failed:', e.message);
      rawScan._clientDns = null;
      return rawScan;
    }
  }

  /* ── DATA NORMALIZATION ──────────────────────────────────────── */
  /* Converts Railway /brain/scan response → the shape Scanner.jsx
     expects (based on its hardcoded demo `data` constant).

     Anything missing from Railway gets a safe empty default so
     render code doesn't crash on `.map()` or `.length`. */
  function normalizeScanData(raw) {
    if (!raw) return EMPTY_SCAN_DATA;

    const domain = raw.domain || raw.site_url?.replace(/^https?:\/\//,'').replace(/\/.*$/,'') || 'unknown';
    const scannedAt = raw._scanned_at
      ? formatScanDate(raw._scanned_at)
      : 'Just now';

    /* ── Pages / Sitemap ───────────────────────────
       Railway returns `urls[]` flat array.
       Scanner.jsx expects `sitemap[]` with {d:depth, p:path}. */
    const urls = Array.isArray(raw.urls) ? raw.urls : [];
    const sitemap = buildSitemapTree(urls, domain);
    const pages = raw.url_count ?? raw.pages_scanned ?? urls.length;

    /* ── Files & Docs ──────────────────────────────
       Railway returns `documents` with CATEGORY arrays (pdfs, office,
       archives, images) — NOT a unified items[] array. v1 scanner.html
       renders each category separately; v2 FilesTab expects one flat
       items[] with {id, url, type, size, page, filename, flags}.

       Session B flatten: concat all category arrays into items[] with
       `type` derived from the category name. flags[] empty for now —
       the per-file flag system (no-OCR, orphan, oversize, etc.) is a
       future-forward feature; we populate it from available signals but
       don't fabricate. */
    const documents = raw.documents || {};
    const buildCategoryItems = (arr, catType) =>
      (arr || []).map((item, idx) => {
        const url = typeof item === 'string' ? item : (item.url || item.href || item.path || '');
        const filename = url.split('/').pop() || 'file';
        const size = (typeof item === 'object' && item.size) ? item.size : null;

        // Build flags array from observable signals. Without per-page
        // orphan analysis or PDF OCR inspection, we populate conservatively:
        //   - 'case' if ext contains uppercase (.PDF, .DOCX)
        //   - 'spaces' if filename has spaces
        //   - 'parens' if filename contains parens
        //   - 'generic' if filename matches known generic patterns
        //   - 'oversize' if size > 2MB (2097152 bytes)
        const flags = [];
        const ext = filename.split('.').pop() || '';
        if (/[A-Z]/.test(ext)) flags.push('case');
        if (/\s/.test(filename)) flags.push('spaces');
        if (/[()]/.test(filename)) flags.push('parens');
        if (/^(document|file|untitled|scan|image|copy|new)[\s_-]?\d*\.[a-z]+$/i.test(filename)) flags.push('generic');
        if (/\d{1,2}[-_. ]\d{1,2}[-_. ]\d{2,4}/.test(filename)) flags.push('date');
        if (/[àáâãäåèéêëìíîïòóôõöùúûüýÿñç]/i.test(filename)) flags.push('diacritics');
        if (/[-_ ](?:v\d+|rev\d+|draft|final|copy|old|new)/i.test(filename)) flags.push('revision');
        if (typeof size === 'number' && size > 2097152) flags.push('oversize');
        /* ADA: non-PDF office formats need tagged PDF for accessibility */
        const extLow = (ext || '').toLowerCase();
        if (['doc','docx','ppt','pptx','xls','xlsx','odt','ods','odp'].includes(extLow)) flags.push('non-accessible-format');
        /* ADA: PDFs with scan/image filenames likely lack OCR */
        if (extLow === 'pdf' && /scan|scanned|fax|application[-_\s]?form/i.test(filename)) flags.push('no-ocr');

        return {
          id: `${catType}-${idx}`,
          url,
          type: catType,
          size: size,
          sizeLabel: size ? formatBytes(size) : '—',
          filename,
          mime: (typeof item === 'object' && item.mime) || null,
          page: (typeof item === 'object' && (item.page || item.source_page)) || null,
          flags,
        };
      });

    /* Merge server-detected files with client-side extracted files from page walk.
       _clientFiles comes from extractLinksAndFilesFromBody across all fetched pages.
       This catches PDFs linked in page content (Elementor lists, CTA blocks, etc.)
       that the server's sitemap-only crawl misses. */
    const clientFileItems = (raw._clientFiles || []).map((f, idx) => {
      const filename = f.href.split('/').pop().split('?')[0] || 'file';
      const ext = (f.ext || filename.split('.').pop() || 'file').toLowerCase();
      const type = /^(docx?|odt)$/i.test(ext) ? 'doc'
                 : /^(xlsx?|ods|csv)$/i.test(ext) ? 'doc'
                 : /^(pptx?|odp)$/i.test(ext) ? 'doc'
                 : /^(zip|rar|7z|gz)$/i.test(ext) ? 'archive'
                 : ext === 'pdf' ? 'pdf' : 'doc';
      const flags = [];
      if (/[A-Z]/.test(ext)) flags.push('case');
      if (/\s/.test(filename)) flags.push('spaces');
      if (/[()]/.test(filename)) flags.push('parens');
      if (/\d{1,2}[-_.\s]\d{1,2}[-_.\s]\d{2,4}/.test(filename)) flags.push('date');
      if (/[-_\s](?:v\d+|rev\d+|draft|final|copy|old|new)\b/i.test(filename)) flags.push('revision');
      if (/^(document|file|untitled|scan|image|copy|new)[\s_-]?\d*\.pdf$/i.test(filename)) flags.push('generic');
      /* ADA heuristics for PDFs: flag if filename suggests scanned/image-only doc */
      if (ext === 'pdf') {
        if (/scan|scanned|fax|form[-_\s]?\d|application[-_\s]?form/i.test(filename)) flags.push('no-ocr');
        if (/image|img|photo|picture|photo[-_\s]?copy|photocopy/i.test(filename)) flags.push('scanned-image');
      }
      /* Non-PDF documents are inherently less accessible for screen readers */
      if (/^(doc|ppt|xls)$/i.test(ext)) flags.push('non-accessible-format');
      return {
        id: 'client-' + idx,
        url: f.href,
        type,
        size: null,
        sizeLabel: '—',
        filename,
        mime: null,
        page: f.page_url || null,
        source: 'page-crawl',
        link_text: f.text || null,
        flags,
      };
    });

    /* Dedup by URL — server items win (they may have size/mime data) */
    const seenFileUrls = new Set();
    const filesItems = [
      ...buildCategoryItems(documents.pdfs, 'pdf'),
      ...buildCategoryItems(documents.office, 'doc'),
      ...buildCategoryItems(documents.archives, 'archive'),
      ...buildCategoryItems(documents.images, 'image'),
    ].filter(f => {
      if (!f.url) return false;
      seenFileUrls.add(f.url);
      return true;
    });
    /* Add client-crawl files not already in server list */
    clientFileItems.forEach(f => {
      if (f.url && !seenFileUrls.has(f.url)) {
        seenFileUrls.add(f.url);
        filesItems.push(f);
      }
    });

    /* ── Images list (separate from Files tab) ─────────────────
       THREE sources merged, in priority order:
         1) rawScan._clientImages — homepage <img> parse from enrichBrandData
         2) rawScan._clientPages[].images — per-page <img> parses from enrichPagesData
         3) documents.images — URL-only fallback from scanner module sitemap crawl

       All three merge into a single deduplicated list. Dedup by resolved
       src URL — the richer source wins (with alt/width/height) over URL-only.

       Output shape for ImagesTab: {id, src, filename, ext, type, alt,
       width, height, mime}.

       Why merge (not just pick one): Homepage often has few content images
       (CSS-background heavy). Per-page scans find interior content. Files
       tab documents.images often holds sitemap-discovered images the DOM
       parse missed. Combining all three gives the most complete inventory. */
    const imageMap = {};  // src → image object, richer wins
    function addImage(img, sourceTag) {
      if (!img || !img.src) return;
      const src = img.src;
      if (src.startsWith('data:')) return;
      if (/sitebuilder[\/\\]images[\/\\]navbar-/i.test(src)) return;
      if (/sitebuilder[\/\\]images[\/\\]spacer/i.test(src)) return;
      if (/\/cdn-cgi\//.test(src)) return;
      const existing = imageMap[src];
      if (!existing) {
        imageMap[src] = { ...img, _sources: [sourceTag] };
        return;
      }
      /* Merge: keep richer data, accumulate sources */
      if (!existing.alt && img.alt) existing.alt = img.alt;
      if (!existing.width && img.width) existing.width = img.width;
      if (!existing.height && img.height) existing.height = img.height;
      if (!existing.mime && img.mime) existing.mime = img.mime;
      existing._sources.push(sourceTag);
    }

    /* Source 1: homepage extraction */
    (Array.isArray(raw._clientImages) ? raw._clientImages : []).forEach(img => {
      addImage(img, 'home');
    });

    /* Source 2: per-page extractions (from enrichPagesData) */
    (Array.isArray(raw._clientPages) ? raw._clientPages : []).forEach(page => {
      (page.images || []).forEach(img => addImage(img, 'page'));
    });

    /* Source 3: server documents.images — URL-only, fill type from path */
    (documents.images || []).forEach((item, idx) => {
      const src = typeof item === 'string' ? item : (item.src || item.url || item.href || '');
      if (!src) return;
      const filename = (src.split('?')[0].split('/').pop() || `image-${idx}`);
      const ext = (filename.split('.').pop() || 'jpg').toLowerCase();
      const sl = src.toLowerCase();
      const type = sl.includes('logo') ? 'logo'
                 : sl.includes('icon') ? 'icon'
                 : (sl.includes('hero') || sl.includes('banner')) ? 'hero'
                 : sl.includes('thumb') ? 'thumbnail'
                 : 'image';
      addImage({
        src, filename, ext, type,
        alt:    (typeof item === 'object' && item.alt)    || '',
        width:  (typeof item === 'object' && item.width)  || null,
        height: (typeof item === 'object' && item.height) || null,
        mime:   (typeof item === 'object' && item.mime)   || null,
      }, 'server');
    });

    /* Source 4 (v1.3.8 R7p4 patch g): images declared in the site's
       XML sitemap. The CMS knows about every image it published; the
       sitemap is its inventory. This source typically dwarfs the
       homepage walker on content-heavy sites — a portfolio site with
       100 case studies might emit 800+ images here while the homepage
       walker only finds 30. Server returns {sitemap_images: {images,
       sitemaps_walked, page_count, truncated}}. */
    const sitemapImg = raw.sitemap_images && Array.isArray(raw.sitemap_images.images)
      ? raw.sitemap_images.images : [];
    sitemapImg.forEach(img => {
      if (!img || !img.src) return;
      const filename = (img.src.split('?')[0].split('/').pop() || 'image');
      const ext = (filename.split('.').pop() || 'jpg').toLowerCase();
      const sl = img.src.toLowerCase();
      const type = sl.includes('logo') ? 'logo'
                 : sl.includes('icon') ? 'icon'
                 : (sl.includes('hero') || sl.includes('banner')) ? 'hero'
                 : sl.includes('thumb') ? 'thumbnail'
                 : 'image';
      addImage({
        src: img.src,
        filename,
        ext,
        type,
        alt: img.title || img.caption || '',
        page_url: img.page_url || null,
        _from_sitemap: true,
      }, 'sitemap');
    });

    /* Finalize: assign stable ids, strip internal _sources marker */
    const imageList = Object.values(imageMap).map((img, idx) => ({
      id: `img-${idx}`,
      src: img.src,
      filename: img.filename || (img.src.split('?')[0].split('/').pop() || `image-${idx}`),
      ext: img.ext || (img.src.split('?')[0].split('.').pop() || 'jpg').toLowerCase(),
      type: img.type || 'image',
      alt: img.alt || '',
      width: img.width || null,
      height: img.height || null,
      mime: img.mime || null,
    }));

    /* ── Tech details ──────────────────────────────
       Railway v0.9.4 adds website_builder, detected_platforms, server_software
       via enrichment. Prefer enriched fields over base scanner output. */
    const tech = {
      platform: raw.website_builder || raw.platform || 'Unknown',
      cms: raw.cms || null,
      confidence: raw.platform_confidence || (raw.detected_platforms?.[0]?.confidence) || 'low',
      signals: raw.platform_signals || [],
      version: raw.platform_version || raw.wp_version || null,
      majorVersion: raw.platform_major_version || null,
      /* plugins: pass through whatever shape server sends. Server today
         sends string[] (just names from v1 site_counts.wp_plugins). After
         server Phase 2 it will send object[] {name, slug, version,
         category, vendor_url, signals[]}. The Plugins & Forms tab handles
         both shapes via normalizePluginEntry below. */
      plugins: raw.detected_plugins || raw.extensions?.plugins || [],
      themes: raw.extensions?.themes || [],
      hasSSL: raw.has_ssl ?? true,
      httpStatus: raw.http_status || 200,
      hosting: raw.hosting || null,
      cdn: raw.cdn || null,
      serverSoftware: raw.server_software || null,
      poweredBy: raw.powered_by || null,
      detectedPlatforms: raw.detected_platforms || [],  // full list from signature detection
      websiteBuilder: raw.website_builder || null,       // top match

      /* ── Round 7 part 4 (v1.3.8) — Hosting + tracking + civic + estimator ──
         Server emits these as separate top-level fields. Tabs that need
         them pull from `data.tech.hostingProvider` etc. Falls through to
         null/empty when the server hasn't run R7p4 (e.g. cached scan
         from earlier version), keeping the UI safe. */
      hostingProvider: raw.hosting_provider || null,
      /* Shape: { primary: {id,name,category,signals_matched,sources,confidence,notes},
                   all_matches: [...same shape] } */
      trackingPixels: raw.tracking_pixels || { pixels: [], count: 0 },
      /* Shape: { pixels: [{provider, id, source}], count } */
      civicClassification: raw.civic_classification || null,
      /* Shape: { is_civic, confidence (0-100), confidence_label, signals[],
                   industry_label } — industry_label is ALWAYS "Civic / Municipal",
                   never "CivicPlus" per locked rule. */
      migrationEstimate: raw.migration_estimate || null,
      /* Shape: { preset_key, tier, hours_low, hours_high, base_hours_low,
                   base_hours_high, scope ('small'|'large'), description,
                   modifiers_applied: [{name, impact, detail}], confidence,
                   notes } */
    };

    /* ── Forms ──────────────────────────────────────
       Per-form detail extracted from scanned page HTML. Phase 2 server
       work will populate this; for now passes through whatever the server
       sends (likely empty until that work lands).
       Shape per item: {
         id?:        string,
         page_url:   string,        // where the form lives
         action:     string|null,   // form action attr
         method:     'get'|'post',
         platform?:  string,        // 'gravity'|'cf7'|'wpforms'|'ninja'|'native'|'unknown'
         fields:     [{
           name: string, type: string, label: string|null,
           required: boolean, placeholder?: string,
           options?: string[],   // for select/radio/checkbox
         }]
       }
       The Plugins & Forms tab renders empty-state when forms.length===0.

       v1.3.8 patch h2: merges server-emitted forms (homepage only) with
       client-side per-page forms (from enrichPagesData). Dedupes by
       page_url + form id since a single form is sometimes rendered on
       multiple pages (footer newsletter, sticky CTA), and the same form
       can appear in raw.forms (from server) and rawScan._clientForms
       (from per-page walk) when the homepage scan hit on it. */
    const forms = (() => {
      const serverForms = Array.isArray(raw.forms) ? raw.forms : [];
      const clientForms = Array.isArray(raw._clientForms) ? raw._clientForms : [];
      const seen = new Set();
      const merged = [];
      for (const f of serverForms.concat(clientForms)) {
        if (!f) continue;
        const key = (f.page_url || '') + '|' + (f.id || '') + '|' + (f.platform || '');
        if (seen.has(key)) continue;
        seen.add(key);
        merged.push(f);
      }
      return merged;
    })();

    /* ── Integrations ───────────────────────────────
       Build from multiple raw sources:
         1. raw.integrations  — explicit list if server sends it
         2. raw.tracking_pixels — analytics/marketing pixels (GTM, GA4, Meta, etc.)
         3. raw.hosting_provider — CDN (Cloudflare, Fastly, etc.)
         4. raw.tech signals — reCAPTCHA, cookie banners, ADA widgets
         5. raw.plugins  — detect known 3rd-party-connected plugins
       Shape: {name, category, signal, vendor_url?, evidence?: string[]} */
    const integrations = (() => {
      const out = Array.isArray(raw.integrations) ? [...raw.integrations] : [];
      const seen = new Set(out.map(i => i.name));
      function add(name, category, signal, vendor_url, evidence) {
        if (seen.has(name)) return;
        seen.add(name);
        out.push({ name, category, signal, vendor_url: vendor_url || null, evidence: evidence || [] });
      }

      /* 1. Tracking pixels from extractTrackingPixels() */
      const pixelMap = {
        google_tag_manager:    { name:'Google Tag Manager', category:'analytics', vendor_url:'https://tagmanager.google.com' },
        google_analytics:      { name:'Google Analytics 4 (GA4)', category:'analytics', vendor_url:'https://analytics.google.com' },
        google_analytics_legacy:{ name:'Google Analytics (UA)', category:'analytics', vendor_url:'https://analytics.google.com' },
        meta_pixel:            { name:'Meta (Facebook) Pixel', category:'analytics', vendor_url:'https://business.facebook.com' },
        tiktok_pixel:          { name:'TikTok Pixel', category:'analytics', vendor_url:'https://ads.tiktok.com' },
        linkedin_insight:      { name:'LinkedIn Insight Tag', category:'analytics', vendor_url:'https://business.linkedin.com' },
        pinterest_tag:         { name:'Pinterest Tag', category:'analytics', vendor_url:'https://ads.pinterest.com' },
        hotjar:                { name:'Hotjar', category:'analytics', vendor_url:'https://www.hotjar.com' },
        hubspot:               { name:'HubSpot', category:'crm', vendor_url:'https://www.hubspot.com' },
      };
      const pixels = raw.tracking_pixels?.pixels || [];
      pixels.forEach(function(p) {
        const def = pixelMap[p.provider];
        if (def) add(def.name, def.category, p.source, def.vendor_url, p.id ? ['ID: ' + p.id] : []);
      });

      /* 2. CDN from hosting provider */
      const hosting = raw.hosting_provider?.primary;
      if (hosting && hosting.category === 'CDN') {
        add(hosting.name, 'cdn', hosting.sources?.join(', ') || 'headers', null, []);
      }
      /* Also check cloudflare_present flag from page scan */
      if (raw.cloudflare_present && !seen.has('Cloudflare')) {
        add('Cloudflare', 'cdn', 'response headers', 'https://cloudflare.com', []);
      }

      /* 3. reCAPTCHA — detected from body signals or plugins */
      const allPluginSlugs = (raw.plugins || []).map(function(p) { return (p.slug || p.name || '').toLowerCase(); });
      const bodyText = typeof raw._raw_body === 'string' ? raw._raw_body : '';
      if (allPluginSlugs.some(function(s) { return s.includes('recaptcha'); }) ||
          /recaptcha\.google\.com|g-recaptcha|data-sitekey/i.test(bodyText)) {
        add('Google reCAPTCHA', 'auth', 'script src / plugin', 'https://www.google.com/recaptcha', []);
      }

      /* 4. Cookie consent banners */
      const cookiePlugins = ['cookiebot','complianz','cookie-notice','gdpr-cookie','cookie-law-info','uk-cookie-consent'];
      if (cookiePlugins.some(function(s) { return allPluginSlugs.some(function(p) { return p.includes(s); }); })) {
        add('Cookie Consent / GDPR Banner', 'legal', 'plugin detected', null, []);
      }

      /* 5. ADA / Accessibility widgets */
      const adaWidgets = [
        { patterns:['accessibe','acsbapp','acsbjs'],        name:'AccessiBe',  vendor:'https://accessibe.com' },
        { patterns:['userway'],                             name:'UserWay',    vendor:'https://userway.org' },
        { patterns:['audioeye'],                            name:'AudioEye',   vendor:'https://www.audioeye.com' },
        { patterns:['equalweb'],                            name:'EqualWeb',   vendor:'https://equalweb.com' },
        { patterns:['recite-me','reciteme'],                name:'Recite Me',  vendor:'https://reciteme.com' },
        { patterns:['monsido'],                             name:'Monsido',    vendor:'https://monsido.com' },
      ];
      adaWidgets.forEach(function(w) {
        const hit = w.patterns.some(function(pat) {
          return allPluginSlugs.some(function(s) { return s.includes(pat); });
        });
        if (hit) add(w.name + ' (ADA Widget)', 'accessibility', 'plugin detected', w.vendor, []);
      });

      /* 6. JotForm (external, not WP plugin) */
      if (/jotform\.com|jotform-embed/i.test(bodyText)) {
        add('JotForm', 'forms', 'script embed', 'https://www.jotform.com', []);
      }

      /* 7. Live chat / support widgets */
      const chatPatterns = [
        { re:/intercom/i,          name:'Intercom',   vendor:'https://intercom.com' },
        { re:/tawk\.to|tawk_api/i, name:'Tawk.to',    vendor:'https://tawk.to' },
        { re:/tidio/i,             name:'Tidio',      vendor:'https://tidio.com' },
        { re:/drift\.com/i,        name:'Drift',      vendor:'https://drift.com' },
        { re:/zopim|zendesk/i,     name:'Zendesk Chat', vendor:'https://zendesk.com' },
        { re:/crisp\.chat/i,       name:'Crisp',      vendor:'https://crisp.chat' },
        { re:/freshdesk|freshchat/i,name:'Freshchat', vendor:'https://freshworks.com' },
      ];
      chatPatterns.forEach(function(c) {
        if (c.re.test(bodyText)) add(c.name, 'chat', 'script embed', c.vendor, []);
      });

      /* 8. External link domains from pages data — portals, patient systems, 
            billing platforms, booking widgets, 3rd-party tools linked from site.
            Any external domain linked from 2+ pages OR with specific signals
            (login.jsp, portal, pay, billing, schedule, patient) gets surfaced. */
      const pagesData = Array.isArray(raw._clientPages) ? raw._clientPages : 
                        Array.isArray(raw.pages) ? raw.pages : [];
      const extDomains = new Map(); // domain → {urls: Set, pages: Set, sample_url}
      const externalSignalPatterns = [
        /login|portal|patient|pay|billing|schedule|book|appointment|member|account|signin|sign-in|auth|secure|mychart|healow|ecwcloud|athena|epic|cerner|allscripts/i
      ];
      pagesData.forEach(function(pg) {
        const pageUrl = pg.url || '';
        const links = Array.isArray(pg.external_links) ? pg.external_links : 
                      Array.isArray(pg.links) ? pg.links : [];
        links.forEach(function(link) {
          const href = typeof link === 'string' ? link : (link.href || link.url || '');
          if (!href || !href.startsWith('http')) return;
          try {
            const u = new URL(href);
            const domain = u.hostname.toLowerCase().replace(/^www\./, '');
            const siteDomain = (raw.site_url || raw.domain || '').replace(/^https?:\/\//, '').replace(/^www\./, '').split('/')[0].toLowerCase();
            if (!domain || domain === siteDomain || domain.endsWith('.' + siteDomain)) return;
            if (!extDomains.has(domain)) extDomains.set(domain, { urls: new Set(), pages: new Set(), sample_url: href });
            extDomains.get(domain).urls.add(href);
            if (pageUrl) extDomains.get(domain).pages.add(pageUrl);
          } catch (e) {}
        });
      });
      /* Known 3rd-party service fingerprints */
      const knownServices = {
        'ecwcloud.com':     { name:'eClinicalWorks Patient Portal', category:'healthcare' },
        'healow.com':       { name:'Healow Patient Portal',         category:'healthcare' },
        'mychart':          { name:'Epic MyChart',                  category:'healthcare' },
        'athenahealth.com': { name:'Athena Health',                 category:'healthcare' },
        'cerner.com':       { name:'Cerner Health',                 category:'healthcare' },
        'patientpoint.com': { name:'PatientPoint',                  category:'healthcare' },
        'zocdoc.com':       { name:'ZocDoc Scheduling',             category:'scheduling' },
        'opentable.com':    { name:'OpenTable',                     category:'scheduling' },
        'acuityscheduling': { name:'Acuity Scheduling',             category:'scheduling' },
        'calendly.com':     { name:'Calendly',                      category:'scheduling' },
        'squareup.com':     { name:'Square Payments',               category:'payment' },
        'paypal.com':       { name:'PayPal',                        category:'payment' },
        'stripe.com':       { name:'Stripe',                        category:'payment' },
        'venmo.com':        { name:'Venmo',                         category:'payment' },
        'mindbodyonline':   { name:'Mindbody',                      category:'scheduling' },
        'typeform.com':     { name:'Typeform',                      category:'forms' },
        'jotform.com':      { name:'JotForm',                       category:'forms' },
        'formstack.com':    { name:'Formstack',                     category:'forms' },
        'podium.com':       { name:'Podium',                        category:'reviews' },
        'birdeye.com':      { name:'BirdEye',                       category:'reviews' },
        'google.com/maps':  { name:'Google Maps',                   category:'maps' },
        'youtube.com':      { name:'YouTube Embed',                 category:'media' },
        'vimeo.com':        { name:'Vimeo',                         category:'media' },
        'mailchimp.com':    { name:'Mailchimp',                     category:'mail' },
        'constantcontact':  { name:'Constant Contact',              category:'mail' },
        'klaviyo.com':      { name:'Klaviyo',                       category:'mail' },
      };
      extDomains.forEach(function(info, domain) {
        const pageCount = info.pages.size;
        const urlCount = info.urls.size;
        /* Include if: linked from 2+ pages, OR matches signal patterns, OR is a known service */
        const hasSignal = externalSignalPatterns.some(function(re) { return re.test(info.sample_url); });
        const knownKey = Object.keys(knownServices).find(function(k) { return domain.includes(k) || info.sample_url.includes(k); });
        if (pageCount < 2 && !hasSignal && !knownKey) return;
        const svcName = knownKey ? knownServices[knownKey].name : null;
        const svcCat  = knownKey ? knownServices[knownKey].category : 'external-link';
        const displayName = svcName || ('External: ' + domain);
        const evidence = [];
        if (pageCount > 0) evidence.push('Found on ' + pageCount + ' page' + (pageCount > 1 ? 's' : ''));
        if (urlCount > 1) evidence.push(urlCount + ' links');
        evidence.push(info.sample_url.substring(0, 80));
        add(displayName, svcCat, 'external link on page', info.sample_url, evidence);
      });

      /* Sort: tracking pixels + CDN + known services first (they have category != external-link),
         then header/nav/footer external links, then body links */
      out.sort(function(a, b) {
        const isPrimary = function(x) { return x.category !== 'external-link'; };
        const sectionPriority = { header:0, nav:1, footer:2, body:3 };
        if (isPrimary(a) && !isPrimary(b)) return -1;
        if (!isPrimary(a) && isPrimary(b)) return 1;
        const aSection = (a.evidence && a.evidence[2] ? (a.evidence[2].match(/section:(\w+)/)||[])[1] : null) || 'body';
        const bSection = (b.evidence && b.evidence[2] ? (b.evidence[2].match(/section:(\w+)/)||[])[1] : null) || 'body';
        return (sectionPriority[aSection]||3) - (sectionPriority[bSection]||3);
      });
      return out;
    })();

    /* ── Per-page SEO (v0.9.4 Session 8 enrichment) ─
       Array of {url, title, meta_description, h1, seo_issues, ...} per scanned page. */
    const perPageSeo = Array.isArray(raw.pages) ? raw.pages : [];

    /* ── Migration / Recs ──────────────────────────
       Railway returns migration_notes[] (simple strings) and
       migration_notes_detailed[] with severity tags. */
    const migrationNotes = Array.isArray(raw.migration_notes_detailed) && raw.migration_notes_detailed.length
      ? raw.migration_notes_detailed
      : (raw.migration_notes || []).map(msg => ({ message: msg, severity: 'info', area: 'general' }));
    const warnings = raw.warnings || [];

    /* ── Counts for tab labels ──────────────────── */
    /* Images count: prefer actual extracted imageList length (rich data)
       over scanner module's raw total. Fallback to server total when
       we got 0 client-side extractions but server reports images exist. */
    const imagesCount = imageList.length
      || raw.documents?.totals?.images
      || raw.site_counts?.images_from_railway
      || 0;
    const docsCount   = raw.documents?.totals?.grandTotal || filesItems.length || 0;

    /* ── Compose final shape ──────────────────────
       Keep Scanner.jsx's existing field names so rendering
       code works unchanged. Fill missing stuff with empty defaults. */
    return {
      /* Top-level identity */
      site: domain,
      siteUrl: raw.site_url || ('https://' + domain),
      scannedAt,
      scannedAtISO: raw._scanned_at || new Date().toISOString(),
      scanId: raw.scan_id || null,
      scanVersion: raw.scan_version || null,

      /* Counts */
      pages,
      images: imagesCount,
      icons: Array.isArray(raw.icon_libraries) ? raw.icon_libraries.length : 0,  /* Real count from CSS extraction */
      maps: Array.isArray(raw._clientMaps) ? raw._clientMaps.length : 0,
      mapsList: Array.isArray(raw._clientMaps) ? raw._clientMaps : [],

      /* Image list (Session images-tab): array of image objects for ImagesTab.
         data.images stays as a count for backward-compat with tab labels. */
      imageList,

      /* v1.3.8 R7p4 patch g — sitemap image metadata. Frontend can show
         "856 images declared in sitemap.xml across 124 pages" notice in
         the Images tab. truncated=true means there were more than 5000
         images and the server stopped collecting (we still walk subset). */
      sitemapImages: raw.sitemap_images
        ? {
            found: !!raw.sitemap_images.found,
            count: Array.isArray(raw.sitemap_images.images) ? raw.sitemap_images.images.length : 0,
            page_count: raw.sitemap_images.page_count || 0,
            sitemaps_walked: Array.isArray(raw.sitemap_images.sitemaps_walked) ? raw.sitemap_images.sitemaps_walked.length : 0,
            truncated: !!raw.sitemap_images.truncated,
          }
        : { found: false, count: 0, page_count: 0, sitemaps_walked: 0, truncated: false },

      /* Per-page content (Session pages-tab — v1 parity).
         Array of fully-hydrated page objects: {url, label, menu_name, title,
         description, meta, headings, content, images, content_length, ...}.
         Populated by enrichPagesData() post-scan via per-URL /brain/scan/test
         fetches. Empty array if enrichment skipped or no URLs discovered.
         pageFetchLimited=true when more URLs exist than PAGE_FETCH_CAP. */
      pagesList: Array.isArray(raw._clientPages) ? raw._clientPages : [],
      pageFetchLimited: !!raw._pageFetchLimited,
      pageFetchTotal: raw._pageFetchTotal || (Array.isArray(raw.urls) ? raw.urls.length : 0),

      /* Real data (session 7) */
      sitemap,
      files: { items: filesItems, totals: raw.documents?.totals || {} },
      tech,
      forms,                          /* Phase 2 — per-form structure for Plugins & Forms tab */
      integrations,                   /* Phase 2 — third-party services */
      perPageSeo,                     /* v0.9.4 — per-URL SEO from enrichment */
      /* v1.3.0 — Scan Confidence + pre-migration checklist.
         Shape: { score, band, signals[], advisories[], pre_migration_checklist[] }
         Empty object if server didn't include (older Railway version). */
      scanConfidence: raw.scan_confidence || null,
      /* v1.3.3 — was_blocked signal. True when scan was effectively blocked
         (bot challenge, robots/sitemap 403, SPA shell). When true, scan
         did NOT count toward the user's monthly usage quota — frontend
         shows a "this scan didn't count toward your usage" notice next
         to the low-confidence advisory. */
      wasBlocked: !!raw.was_blocked,
      migrationNotes,
      warnings,
      migrationComplexity: raw.migration_complexity || 'Unknown',
      estimate: raw.estimate || null,
      phases: raw.phases || [],
      allPlatformScores: raw.all_platform_scores || [],

      /* AI summary (Brain 4 only) */
      brain4Active: !!raw.brain4_active,
      aiSummary: raw.ai_summary || null,

      /* Plan info */
      plan: raw.plan || null,
      pageLimit: raw.page_limit || null,
      pageLimitHit: !!raw.page_limit_hit,
      usedDeepScanCredit: !!raw.used_deep_scan_credit,

      /* Message + disclaimer */
      message: raw.message || null,
      disclaimer: raw._disclaimer || null,

      /* v1.3.8 patch h5: server build tag from /brain/scan response.
         Surfaces in the SA Command and Brand Kit footer so a stale
         server (no logo/favicon extraction) is immediately visible
         instead of being mistaken for a frontend bug. */
      serverBuild: raw._server_build || null,
      serverVersion: raw._server_version || null,

      /* ── Brand Kit (Session 9/A — real data) ─────────────────
         colors/fonts/logo populated by enrichBrandData() before normalize.
         Safe defaults if enrichment failed or hasn't run yet.

         Logo priority (v1.3.7):
           1. raw.logo_url — scraped from <img class="custom-logo">
              or similar in extractLogoFromBody. This is the logo
              actually displayed in the site header — what the brand
              looks like in practice.
           2. raw.contact?.logo_url — extracted from JSON-LD schema.
              Author-declared canonical logo. Always set when site
              emits Organization or LocalBusiness markup with a logo
              property. Falls through when scraping found nothing
              (rare on modern WP themes; common on minimal/custom sites).
           3. null — Brand Kit shows the fallback initial char. */
      /* v1.3.8 patch h2: defensive unescape applied at normalize too.
         Catches: (a) old sessionStorage entries cached before patch h
         deployed (still containing JSON-escaped `\/`), (b) raw values
         that came through a code path bypassing absolute(). Cheap +
         safe — `\/` and `\\` aren't legitimate in URLs. */
      logo: domain.charAt(0).toUpperCase(),  /* Fallback initial char */
      logoUrl: (function() {
        const u = raw.logo_url || raw.contact?.logo_url || null;
        return u ? String(u).replace(/\\\//g, '/').replace(/\\\\/g, '\\') : null;
      })(),
      ogImage: (function() {
        const u = raw.og_image || null;
        return u ? String(u).replace(/\\\//g, '/').replace(/\\\\/g, '\\') : null;
      })(),
      faviconUrl: (function() {
        const u = raw.favicon_url || null;
        if (u) return String(u).replace(/\\\//g, '/').replace(/\\\\/g, '\\');
        /* v1.3.8 patch h2: ultimate fallback to /favicon.ico — virtually
           every site (WP, Shopify, custom) serves one even without the
           <link rel="icon"> tag. The browser would fall back to this
           anyway; might as well show it in the Brand Kit too. The
           BrandKitTab onError handler hides the tile if THIS 404s. */
        try {
          const u2 = raw.site_url || ('https://' + domain);
          return new URL('/favicon.ico', u2).toString();
        } catch (e) {
          return null;
        }
      })(),

      /* Session A: dynamic primary email/phone labels from contact extraction.
         Computed here so BrandKitTab doesn't have to reach into contact.emails[0].label etc. */
      primaryEmailLabel: (function() {
        const first = raw.contact?.emails?.[0];
        if (!first) return 'Primary Email';
        if (typeof first === 'object' && first.label && first.label !== 'Email') return first.label;
        return 'Primary Email';
      })(),
      primaryEmail: (function() {
        const first = raw.contact?.emails?.[0];
        if (!first) return null;
        return typeof first === 'object' ? first.email : first;
      })(),
      primaryPhoneLabel: (function() {
        const first = raw.contact?.phones?.[0];
        if (!first) return 'Primary Phone';
        if (typeof first === 'object' && (first.label || first.type) && first.label !== 'Phone') {
          return first.label || first.type;
        }
        return 'Primary Phone';
      })(),
      primaryPhone: (function() {
        /* Skip placeholder numbers when picking the primary. The same
           logic exists in Scanner.jsx's Contact tab as backstop, but
           filtering here ensures Brand Kit's "Primary Phone" shows a
           real number, not a (999)-999-9999 form placeholder. */
        function isPlaceholder(numStr) {
          if (!numStr) return true;
          const digits = String(numStr).replace(/\D/g, '');
          if (digits.length < 7) return true;
          if (/^(\d)\1+$/.test(digits)) return true;
          if (/^0?12345678/.test(digits) || /^1234567890$/.test(digits)) return true;
          const norm = digits.length === 11 && digits.startsWith('1') ? digits.slice(1) : digits;
          if (norm.length === 10 && norm.slice(3, 6) === '555') return true;
          return false;
        }
        const phones = raw.contact?.phones || [];
        for (const p of phones) {
          const num = typeof p === 'object' ? p.number : p;
          if (!isPlaceholder(num)) return num;
        }
        return null;
      })(),
      colors: Array.isArray(raw.colors) ? raw.colors.map(function(c) {
        // Accept either array of hex strings (from v5.58 extractColors) or
        // pre-shaped {hex, name, role} objects. Normalize to latter.
        if (typeof c === 'string') return { hex: c, name: c.toUpperCase(), role: 'detected' };
        return { hex: c.hex || c, name: c.name || (c.hex || c).toUpperCase(), role: c.role || 'detected' };
      }) : [],
      fonts: Array.isArray(raw.fonts) ? raw.fonts.map(function(f) {
        if (typeof f === 'string') return { name: f, src: 'CSS', weights: [], uses: 0 };
        return {
          name: f.name || 'Unknown',
          src: f.source || f.src || 'CSS',
          weights: Array.isArray(f.weights) ? f.weights : [],
          uses: f.uses || 0,
          isSystem: !!f.isSystem,
        };
      }) : [],
      /* iconLibraries — Font Awesome / Material / Lucide / etc. detected
         in CSS as @font-face but identified as icon libraries (not brand
         fonts). Surfaces in Brand Kit → Icon Libraries section instead of
         Typography. Populated by extractFontsFromCss() splitter. */
      iconLibraries: Array.isArray(raw.icon_libraries) ? raw.icon_libraries.map(function(f) {
        return {
          name: f.name || 'Unknown',
          source: f.source || '@font-face',
          version: f.version || null,
          usage: f.uses || 0,
        };
      }) : [],

      /* Session 8 — REAL contact data from Railway /brain/scan.
         Falls back to empty-but-valid shape if Railway didn't return it
         (older API version, or contact scrape failed server-side).

         v1.3.8 patch: defensive filter on socials. Older server versions
         (pre 20260426g) could leave plain URL strings or {url}-only items
         in the array from JSON-LD merge, which rendered as "???" rows in
         the social table. Drop anything without a recognizable platform
         name so the UI only ever gets clean structured entries. */
      contact: (() => {
        const rawSocials = Array.isArray(raw.contact?.socials) ? raw.contact.socials : [];
        const cleanSocials = rawSocials.filter(function(s) {
          if (!s || typeof s !== 'object') return false;
          const n = s.name || s.n;
          return typeof n === 'string' && n.length > 0;
        });
        return {
        emails:  raw.contact?.emails  || [],
        phones:  raw.contact?.phones  || [],
        socials: cleanSocials,
        /* Also expose legacy `social` key for any code still reading it */
        social:  cleanSocials,
        address: raw.contact?.address || null,
        /* Session B: additional locations (branch offices, mailing addresses).
           Server v0.9.8 populates this from "City, State ZIP" standalone
           matches. Always an array; may be empty if only one address found. */
        city_state_zip: Array.isArray(raw.contact?.city_state_zip)
          ? raw.contact.city_state_zip
          : [],
        /* Round 7 part 3 (v1.3.7): JSON-LD enrichment fields. When a site
           emits Organization or LocalBusiness schema, we get authoritative
           contact + business data — these flow through to Brand Kit and
           Contact tab without any UI changes for the basic case. The
           additional fields (hours, business_type, price_range, etc.)
           power richer Brand Kit displays. */
        address_components: raw.contact?.address_components || null,
        logo_url:           raw.contact?.logo_url || null,
        image_url:          raw.contact?.image_url || null,
        hours:              Array.isArray(raw.contact?.hours) ? raw.contact.hours : [],
        business_type:      raw.contact?.business_type || null,
        business_name:      raw.contact?.business_name || null,
        business_description: raw.contact?.business_description || null,
        price_range:        raw.contact?.price_range || null,
        currencies_accepted: raw.contact?.currencies_accepted || null,
        payment_accepted:   raw.contact?.payment_accepted || null,
        /* Provenance — was contact data sourced from JSON-LD or regex? */
        source:             raw.contact?._source || 'regex',
        /* Diagnostic (SA Command can show these) — what regex
           originally extracted before JSON-LD overrode it */
        alt_address:        raw.contact?.alt_address || null,
        alt_phones:         Array.isArray(raw.contact?.alt_phones) ? raw.contact.alt_phones : [],
        alt_emails:         Array.isArray(raw.contact?.alt_emails) ? raw.contact.alt_emails : [],
        /* Prefer server-side buildLocations() result (raw.contact.locations, v1.4.2+).
           Falls back to client-side extractLocationsFromPage() result (_clientLocations)
           which only fires on /contact pages with 2+ city_state_zip entries. */
        locations: (() => {
          const srv = raw.contact?.locations;
          if (Array.isArray(srv) && srv.length >= 2) return srv;
          const cli = raw._clientLocations;
          if (Array.isArray(cli) && cli.length >= 2) return cli;
          return [];
        })(),
      };
      })(),

      /* dns[] — flattened DNS records for TechTab display.
         Source: raw._clientDns populated by enrichDnsData().
         Shape: {type, host, val, ttl?} per Scanner.jsx TechTab.
         Order: A → AAAA → CNAME → MX → NS → TXT → SPF → DMARC
         (most-common-first so A record appears at top of the table). */
      dns: (() => {
        const d = raw._clientDns;
        if (!d || typeof d !== 'object') return [];
        const rows = [];
        const apex = d.host || '';
        (d.ipv4 || []).forEach(ip => rows.push({ type: 'A',     host: apex,       val: ip }));
        (d.ipv6 || []).forEach(ip => rows.push({ type: 'AAAA',  host: apex,       val: ip }));
        /* v1.3.8 patch: subdomain entries can resolve to either CNAME or
           A records. Display the record_type the resolver actually got
           rather than always labelling them CNAME. */
        (d.cname || []).forEach(c => rows.push({
          type: c.record_type || 'CNAME',
          host: c.name + '.' + apex,
          val: c.target,
        }));
        (d.mx || []).forEach(m    => rows.push({ type: 'MX',    host: apex,       val: `${m.priority} ${m.host}` }));
        (d.ns || []).forEach(n    => rows.push({ type: 'NS',    host: apex,       val: n }));
        (d.txt || []).forEach(t   => {
          /* Split out SPF/DMARC into their own type tags for clarity */
          if (t && t.startsWith('v=spf1')) rows.push({ type: 'SPF', host: apex, val: t });
          else rows.push({ type: 'TXT', host: apex, val: t });
        });
        if (d.dmarc) rows.push({ type: 'DMARC', host: `_dmarc.${apex}`, val: d.dmarc });
        return rows;
      })(),
      seoPages: [],
      scores: [],
      ada: {
        taxonomy: 'Unknown',
        deadline: null,
        standard: (window.WPSB && window.WPSB.CONSTANTS && window.WPSB.CONSTANTS.WCAG.DISPLAY) || 'WCAG 2.1 AA',
        legalFramework: 'Unknown',
        contactEmail: null,
        score: null,
        pdfsFound: 0,
        totalViolations: 0,
        totals: { pagesScanned: 0, totalViolations: 0, critical: 0, serious: 0, moderate: 0, minor: 0 },
        byPage: [],
        byRule: [],
        monitoring: { enabled: false, cadence: 'monthly', alertEmail: '' },
      },

      /* Raw response preserved for advanced access */
      _raw: raw,
    };
  }

  /* ── HELPERS ─────────────────────────────────────────────────── */
  /* Convert flat URL list to tree structure {d:depth, p:path} */
  function buildSitemapTree(urls, domain) {
    if (!Array.isArray(urls) || !urls.length) return [];

    const paths = urls
      .map(u => {
        try {
          const url = typeof u === 'string' ? u : (u.url || u.href || '');
          const parsed = new URL(url);
          return parsed.pathname || '/';
        } catch { return null; }
      })
      .filter(Boolean)
      .filter((v, i, arr) => arr.indexOf(v) === i)  // dedupe
      .sort();

    return paths.map(p => ({
      d: p === '/' ? 0 : (p.split('/').filter(Boolean).length),
      p,
    }));
  }

  function inferFileType(url) {
    const ext = (url.split('.').pop() || '').toLowerCase();
    if (['pdf'].includes(ext)) return 'PDF';
    if (['doc','docx'].includes(ext)) return 'Word';
    if (['xls','xlsx','csv'].includes(ext)) return 'Spreadsheet';
    if (['ppt','pptx'].includes(ext)) return 'Presentation';
    if (['zip','tar','gz','rar'].includes(ext)) return 'Archive';
    if (['mp4','mov','avi','webm'].includes(ext)) return 'Video';
    if (['mp3','wav','ogg'].includes(ext)) return 'Audio';
    if (['jpg','jpeg','png','gif','webp','svg'].includes(ext)) return 'Image';
    return 'File';
  }

  /* Format byte count as human-readable string.
     e.g. 2048 → "2.0 KB", 1500000 → "1.4 MB" */
  function formatBytes(bytes) {
    if (typeof bytes !== 'number' || !isFinite(bytes) || bytes < 0) return '—';
    if (bytes < 1024) return bytes + ' B';
    if (bytes < 1024 * 1024) return (bytes / 1024).toFixed(1) + ' KB';
    if (bytes < 1024 * 1024 * 1024) return (bytes / (1024 * 1024)).toFixed(1) + ' MB';
    return (bytes / (1024 * 1024 * 1024)).toFixed(2) + ' GB';
  }

  function formatScanDate(iso) {
    try {
      const d = new Date(iso);
      const date = d.toLocaleDateString('en-US', { month: 'short', day: 'numeric', year: 'numeric' });
      const time = d.toLocaleTimeString('en-US', { hour: 'numeric', minute: '2-digit' }).toLowerCase();
      return `${date} · ${time}`;
    } catch { return iso; }
  }

  /* ── EMPTY SCAN DATA (pre-scan render) ──────────────────────── */
  const EMPTY_SCAN_DATA = {
    site: '',
    siteUrl: '',
    scannedAt: '',
    scannedAtISO: null,
    scanId: null,
    scanVersion: null,
    pages: 0, images: 0, icons: 0, maps: 0, imageList: [], pagesList: [], pageFetchLimited: false, pageFetchTotal: 0,
    sitemap: [], files: { items: [], totals: {} },
    tech: { platform: 'Unknown', cms: null, confidence: 'low', signals: [], version: null,
            plugins: [], themes: [], hasSSL: true, httpStatus: 0,
            hosting: null, cdn: null, serverSoftware: null,
            poweredBy: null, detectedPlatforms: [], websiteBuilder: null },
    forms: [], integrations: [],
    perPageSeo: [],
    scanConfidence: null,
    wasBlocked: false,
    migrationNotes: [], warnings: [], migrationComplexity: 'Unknown',
    estimate: null, phases: [], allPlatformScores: [],
    brain4Active: false, aiSummary: null,
    plan: null, pageLimit: null, pageLimitHit: false, usedDeepScanCredit: false,
    message: null, disclaimer: null,
    logo: '•', logoUrl: null, ogImage: null, faviconUrl: null,
    primaryEmail: null, primaryEmailLabel: 'Primary Email',
    primaryPhone: null, primaryPhoneLabel: 'Primary Phone',
    colors: [], fonts: [],
    contact: { emails: [], phones: [], social: [], socials: [], address: null },
    dns: [], seoPages: [], scores: [],
    ada: {
      taxonomy: 'Unknown', deadline: null, standard: (window.WPSB && window.WPSB.CONSTANTS && window.WPSB.CONSTANTS.WCAG.DISPLAY) || 'WCAG 2.1 AA',
      legalFramework: 'Unknown', contactEmail: null, score: null,
      pdfsFound: 0, totalViolations: 0,
      totals: { pagesScanned: 0, totalViolations: 0, critical: 0, serious: 0, moderate: 0, minor: 0 },
      byPage: [], byRule: [],
      monitoring: { enabled: false, cadence: 'monthly', alertEmail: '' },
    },
    _raw: null,
  };

  /* ── REACT HOOK ──────────────────────────────────────────────── */
  /* Usage in Scanner.jsx:
       const { data, scanning, done, error, scan, reset } = useScannerData();
       scan('example.com');  // kicks off scan
       // Renders based on state: error / done+data / scanning / idle */
  /* ── Session scratchpad (Tier 1 persistence) ────────────────────
     Scan results auto-save to sessionStorage under 'wpsb-scan-current'.
     This is FREE browser storage — survives page navigation within the
     SaaS (e.g. navigating to SEO → Site Builder → back to Scanner keeps
     the last scan intact), but clears when the tab closes.

     Why sessionStorage not localStorage:
     - sessionStorage is tab-scoped. Close tab = gone. Natural expiry.
     - localStorage persists across sessions which would require TTL
       management and user expectation of "my scan from 2 weeks ago".

     Why not Supabase: $$ + bloat. A scan result is 20-200KB JSON. Saving
     every scan would swell the Pro plan's row count. Users save scans
     explicitly via a separate "Save scan" button (Tier 2, TBD).

     Why not IndexedDB: overkill for <1MB of JSON. sessionStorage is
     simpler and has the right expiry semantics. */
  const SESSION_KEY = 'wpsb-scan-current';
  /* v1.3.8 patch h5: bumped from 1 → 2. Old cached scans (from before the
     patch g/h server changes) lack logo_url, favicon_url, og_image,
     forms[], sitemap_images, etc. Loading them post-deploy would render
     a Brand Kit with placeholder logos because raw.logo_url is missing
     — exact failure mode the user keeps reporting. Bumping invalidates
     the stale cache and forces a fresh scan. */
  const SESSION_VERSION = 2; /* bump if shape changes, invalidates old cache */

  function saveToSession(normalized) {
    /* v1.3.8 patch h5: also stamp normalized data on window so the user
       can debug from console with `window.__wpsbScanData`. Cheap, no
       memory leak (last scan only). Surfaces logoUrl/faviconUrl/etc.
       for the BrandKit visual issue debugging without DevTools tree-walking. */
    try {
      if (typeof window !== 'undefined') {
        window.__wpsbScanData = normalized;
      }
    } catch (e) { /* swallow */ }
    try {
      if (typeof sessionStorage === 'undefined') return;
      const wrap = { v: SESSION_VERSION, ts: Date.now(), data: normalized };
      const json = JSON.stringify(wrap);
      /* Soft cap: if >4MB, skip — sessionStorage quota is ~5MB in most
         browsers, and giant scans shouldn't brick the feature. */
      if (json.length > 4_000_000) {
        if (typeof console !== 'undefined') console.warn('[WPSB Scanner] Scan too large for sessionStorage, skipping cache');
        return;
      }
      sessionStorage.setItem(SESSION_KEY, json);
    } catch (e) {
      /* Private browsing or quota exceeded — silent fail, scan still usable */
      if (typeof console !== 'undefined') console.warn('[WPSB Scanner] sessionStorage save failed:', e.message);
    }
  }

  function loadFromSession() {
    try {
      if (typeof sessionStorage === 'undefined') return null;
      const raw = sessionStorage.getItem(SESSION_KEY);
      if (!raw) return null;
      const wrap = JSON.parse(raw);
      if (!wrap || wrap.v !== SESSION_VERSION) {
        sessionStorage.removeItem(SESSION_KEY); /* stale shape */
        return null;
      }
      return wrap.data;
    } catch (e) {
      return null;
    }
  }

  function clearSession() {
    try {
      if (typeof sessionStorage !== 'undefined') sessionStorage.removeItem(SESSION_KEY);
    } catch (e) { /* ignore */ }
  }

  /* ── Saved scans (Tier 2 persistence) ─────────────────────────
     Explicit opt-in save to Supabase via Railway endpoints.
     Users build their library of scanned sites they want to keep.

     Plan limits (matches pricing in userMemories):
       Starter     — 10 saved scans
       Growth      — 50 saved scans
       Agency      — 250 saved scans
       Enterprise  — unlimited (hard cap 10000 to prevent abuse)

     Rescan dedup: if user saves a scan for a site they've already saved,
     the new scan REPLACES the old one (same account_id + site pair).
     This is done server-side via UPSERT ON CONFLICT — client just calls
     POST and lets the backend handle the merge. This:
       1. Gives users "more space" effectively (scans auto-refresh)
       2. Prevents DB bloat from repeated scans of the same client
       3. Means "plan limit" = "number of distinct sites", not scan count

     Auto-expire: nightly Railway cron purges scans older than 90 days
     unless flagged as "permanent" (future Phase 2 feature).

     Required Supabase schema (run manually):
       CREATE TABLE saved_scans (
         id              UUID PRIMARY KEY DEFAULT gen_random_uuid(),
         account_id      UUID REFERENCES accounts(id),
         site            TEXT NOT NULL,
         site_url        TEXT NOT NULL,
         scan_data       JSONB NOT NULL,
         page_count      INT,
         image_count     INT,
         is_permanent    BOOLEAN DEFAULT FALSE,
         notes           TEXT,
         created_at      TIMESTAMPTZ DEFAULT NOW(),
         updated_at      TIMESTAMPTZ DEFAULT NOW(),
         UNIQUE(account_id, site)  -- enforces rescan dedup
       );
       CREATE INDEX idx_saved_scans_account ON saved_scans(account_id, updated_at DESC);

     Required Railway endpoints:
       GET  /scans/saved           — list user's saved scans
       POST /scans/saved           — save (upsert by site)
       DELETE /scans/saved/:id     — delete one
       GET  /scans/saved/:id       — hydrate a saved scan back into Scanner
  */

  async function saveScanPermanent(data, opts = {}) {
    if (!data || !data.site) {
      return { success: false, error: 'No scan data to save' };
    }
    const token = (typeof window !== 'undefined' && window.WPSB?.getToken?.()) || null;
    if (!token) {
      return { success: false, error: 'Not signed in' };
    }

    try {
      const r = await fetch(`${RAILWAY_URL}/scans/saved`, {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
          'Authorization': 'Bearer ' + token,
        },
        body: JSON.stringify({
          site: data.site,
          site_url: data.siteUrl,
          scan_data: data,
          page_count: data.pages || 0,
          image_count: data.images || 0,
          notes: opts.notes || null,
        }),
      });

      if (r.status === 404) {
        return { success: false, error: 'Saved scans endpoint not deployed yet', notDeployed: true };
      }
      if (r.status === 403) {
        const j = await r.json().catch(() => ({}));
        if (j.code === 'PLAN_LIMIT_SAVED_SCANS') {
          return { success: false, error: 'Plan limit reached', code: 'PLAN_LIMIT', limit: j.limit, used: j.used };
        }
        return { success: false, error: j.error || 'Permission denied' };
      }
      if (!r.ok) {
        const errText = await r.text().catch(() => 'Unknown error');
        return { success: false, error: errText.substring(0, 200) };
      }
      const j = await r.json();
      return { success: true, data: j, replaced: j.replaced === true };
    } catch (e) {
      return { success: false, error: e.message };
    }
  }

  async function listSavedScans() {
    const token = (typeof window !== 'undefined' && window.WPSB?.getToken?.()) || null;
    if (!token) return { success: false, error: 'Not signed in', items: [] };
    try {
      const r = await fetch(`${RAILWAY_URL}/scans/saved`, {
        method: 'GET',
        headers: { 'Authorization': 'Bearer ' + token },
      });
      if (r.status === 404) return { success: false, error: 'Not deployed yet', notDeployed: true, items: [] };
      if (!r.ok) return { success: false, error: `HTTP ${r.status}`, items: [] };
      const j = await r.json();
      return { success: true, items: j.items || [], plan: j.plan, used: j.used, limit: j.limit };
    } catch (e) {
      return { success: false, error: e.message, items: [] };
    }
  }

  async function deleteSavedScan(id) {
    const token = (typeof window !== 'undefined' && window.WPSB?.getToken?.()) || null;
    if (!token) return { success: false, error: 'Not signed in' };
    try {
      const r = await fetch(`${RAILWAY_URL}/scans/saved/${encodeURIComponent(id)}`, {
        method: 'DELETE',
        headers: { 'Authorization': 'Bearer ' + token },
      });
      if (!r.ok) return { success: false, error: `HTTP ${r.status}` };
      return { success: true };
    } catch (e) {
      return { success: false, error: e.message };
    }
  }

  async function loadSavedScan(id) {
    const token = (typeof window !== 'undefined' && window.WPSB?.getToken?.()) || null;
    if (!token) return { success: false, error: 'Not signed in' };
    try {
      const r = await fetch(`${RAILWAY_URL}/scans/saved/${encodeURIComponent(id)}`, {
        method: 'GET',
        headers: { 'Authorization': 'Bearer ' + token },
      });
      if (!r.ok) return { success: false, error: `HTTP ${r.status}` };
      const j = await r.json();
      return { success: true, data: j.scan_data };
    } catch (e) {
      return { success: false, error: e.message };
    }
  }

  /* ── React hook: useScannerData ────────────────────────────────── */
  function useScannerData() {
    /* Hydrate from sessionStorage on first render. If present, user
       sees their last scan immediately — no "scan again" friction. */
    const cached = loadFromSession();
    const [data, setData]         = useState(cached || EMPTY_SCAN_DATA);
    const [scanning, setScanning] = useState(false);
    const [done, setDone]         = useState(!!cached);  /* cached = ready */
    const [error, setError]       = useState(null);

    const scan = useCallback(async (rawUrl, scanOptions) => {
      setScanning(true); setError(null); setDone(false);
      if (window.announce) window.announce('Scan started');

      /* v1.3.8 patch h3: scanOptions from the configurator decides
         enrichment behaviour:
           - opts.multipage: false → skip enrichPagesData entirely
                                     (homepage-only scan, fastest)
           - opts.multipage: true  → walk up to PAGE_FETCH_CAP pages
                                     (default 100, raised by patch h2)
           - opts.contact / seo / maps / icons → reserved for future
             granular toggling; currently brand enrichment runs as a
             single bundle. When unchecked, those tab counts will show
             empty-state instead of populated data. */
      const useOptions = scanOptions || {
        modes: { brand: true, platform: false, ada: false },
        opts: { multipage: true, contact: true, seo: true, maps: true, icons: true },
      };

      const result = await scanSite(rawUrl, useOptions);

      setScanning(false);

      if (!result.success) {
        setError(result.error);
        setDone(false);
        if (window.wpsbToast) {
          const msg = result.error.code === 'PLAN_LIMIT_SCANS'
            ? 'Monthly scan limit reached.'
            : result.error.code === 'PAYMENT_FAILED'
              ? 'Billing issue — update payment to continue.'
              : result.error.code === 'SCAN_RATE_LIMITED'
                ? 'Please wait before scanning again.'
                : (result.error.message || 'Scan failed.');
          window.wpsbToast(msg, 'warn');
        }
        return { success: false, error: result.error };
      }

      /* v1.3.8 patch h2: stage rendering.

         Old flow:
           1. /brain/scan returns
           2. setScanning(false)  ← progress hits 100%
           3. await enrichBrandData    (~1-2s)
           4. await enrichPagesData   (~10-20s, was sequential)
           5. await enrichDnsData     (~1s)
           6. setData() + setDone()   ← user finally sees results
         User saw progress bar finish, then a 2-15s blank gap.

         New flow:
           1. /brain/scan returns
           2. Normalize & render immediately — view-results loads now
           3. setDone(true) — user sees content
           4. Three enrichments run IN PARALLEL after first paint:
                Promise.all([enrichBrandData, enrichPagesData, enrichDnsData])
           5. As each completes, re-normalize and stream-update setData()
              so colors/fonts, additional pages, DNS records, and forms
              progressively appear in the UI without blocking the initial render.
         User sees the scan view immediately and watches it fill in. */

      /* First render — server-only data, normalized and shown right away */
      const initialEnriched = result.data;
      const initialNormalized = normalizeScanData(initialEnriched);
      setData(initialNormalized);
      setDone(true);
      saveToSession(initialNormalized);
      if (window.wpsbToast) {
        window.wpsbToast(`Scan complete — enriching results…`, 'ok');
      }

      /* Background enrichments — kick off all three in parallel.
         Each writes back into the same `enriched` reference, then we
         re-normalize and setData() so React rerenders. Errors in one
         enrichment don't block the others (each catches and warns).

         v1.3.8 patch h3: multipage flag gates enrichPagesData. When the
         user unchecked "Multi-page (up to 300)" in the configurator,
         the enrichment runs in homepage-only mode (cap=1) so we still
         get forms/content from the landing page but skip the deeper
         walk. Other toggles (contact, seo, maps, icons) are reserved
         for future granular gating. */
      const enriched = initialEnriched;
      const url = result.data.site_url || rawUrl;
      const enrichOpts = (useOptions && useOptions.opts) || {};

      const brandPromise = enrichBrandData(enriched, url)
        .catch(e => { console.warn('[WPSB] brand enrichment failed:', e.message); return enriched; })
        .then(() => {
          /* Re-normalize and stream-update with brand data */
          try { setData(normalizeScanData(enriched)); } catch (e) { console.warn('[WPSB] re-normalize after brand failed:', e.message); }
        });

      /* Pages walker. When opts.multipage is false, restrict to
         homepage only (PAGE_FETCH_CAP=1). When true, use the full cap.
         Either way, runs forms detection on whatever pages it walks. */
      const pagesCap = enrichOpts.multipage === false ? 1 : PAGE_FETCH_CAP;
      const pagesPromise = enrichPagesData(enriched, url, (p) => {
        if (typeof window !== 'undefined' && window.wpsbScanProgress) {
          window.wpsbScanProgress({ kind: 'page', ...p });
        }
      }, { cap: pagesCap })
        .catch(e => { console.warn('[WPSB] pages enrichment failed:', e.message); return enriched; })
        .then(() => {
          try { setData(normalizeScanData(enriched)); } catch (e) { console.warn('[WPSB] re-normalize after pages failed:', e.message); }
        });

      const dnsPromise = enrichDnsData(enriched, url)
        .catch(e => { console.warn('[WPSB] DNS enrichment failed:', e.message); return enriched; })
        .then(() => {
          try { setData(normalizeScanData(enriched)); } catch (e) { console.warn('[WPSB] re-normalize after dns failed:', e.message); }
        });

      /* Final settle — once all three resolve, save the fully-enriched
         payload to sessionStorage so a navigate-away/back retains the
         complete scan, not just the initial render. */
      Promise.all([brandPromise, pagesPromise, dnsPromise]).then(() => {
        try {
          const finalNormalized = normalizeScanData(enriched);
          saveToSession(finalNormalized);
          if (window.wpsbToast) {
            window.wpsbToast(
              `Enrichment done — ${finalNormalized.pages} page${finalNormalized.pages === 1 ? '' : 's'} scanned`,
              'ok'
            );
          }
        } catch (e) {
          console.warn('[WPSB] final settle failed:', e.message);
        }
      });

      return { success: true, data: initialNormalized };
    }, []);

    const reset = useCallback(() => {
      setData(EMPTY_SCAN_DATA);
      setScanning(false); setDone(false); setError(null);
      clearSession(); /* Clear the sessionStorage cache too */
    }, []);

    return { data, scanning, done, error, scan, reset };
  }

  /* ── SCAN HANDOFF helpers ──────────────────────────────────────
     Public API for target pages (SEO workspace, Site Builder, etc.)
     to read scan data that the Scanner pushed via sessionStorage.

     Usage in target page's module:
       const handoff = WPSB_Scanner.getScanHandoff('seo');
       if (handoff) {
         // pre-fill form with handoff.site, handoff.pages, etc.
         WPSB_Scanner.clearScanHandoff('seo'); // one-shot consume
       }

     Keys:
       'wpsb-seo-target'          — SEO workspace handoff
       'wpsb-sitebuilder-prefill' — Site Builder handoff

     Returns null if nothing available, or if payload is older than
     1 hour (stale — user probably navigated via browser back). */
  const HANDOFF_KEYS = {
    seo: 'wpsb-seo-target',
    sitebuilder: 'wpsb-sitebuilder-prefill',
  };
  const HANDOFF_MAX_AGE_MS = 60 * 60 * 1000; /* 1 hour */

  function getScanHandoff(target) {
    try {
      const key = HANDOFF_KEYS[target];
      if (!key || typeof sessionStorage === 'undefined') return null;
      const raw = sessionStorage.getItem(key);
      if (!raw) return null;
      const parsed = JSON.parse(raw);
      if (!parsed || !parsed.timestamp) return null;
      if (Date.now() - parsed.timestamp > HANDOFF_MAX_AGE_MS) {
        sessionStorage.removeItem(key);
        return null;
      }
      return parsed;
    } catch (e) {
      return null;
    }
  }

  function clearScanHandoff(target) {
    try {
      const key = HANDOFF_KEYS[target];
      if (!key || typeof sessionStorage === 'undefined') return;
      sessionStorage.removeItem(key);
    } catch (e) { /* ignore */ }
  }

  /* ── useScannerInfo ────────────────────────────────────────────────
     React hook that fetches /scanner/info once on mount and caches it
     for the session. Returns { info, loading, error } where info is the
     full scanner identity payload (ip, user_agent, hostname,
     allowlist_guides, notes). Used by:
       - Onboarding wizard (show user the IP they need to allowlist BEFORE
         their first scan, so high-security sites work first try)
       - Low-confidence card "Allowlist tab" (in-context allowlist help
         when a scan got blocked)
       - Settings page → Integrations section (reference for support
         tickets and ongoing site management)

     Cached in window-level memory rather than sessionStorage so a fresh
     fetch happens once per page load (Railway's IP can change on redeploy
     and we want fresh data on each session). */
  let _scannerInfoCache = null;
  let _scannerInfoPromise = null;
  function useScannerInfo() {
    const { useState, useEffect } = React;
    const [info, setInfo] = useState(_scannerInfoCache);
    const [loading, setLoading] = useState(!_scannerInfoCache);
    const [error, setError] = useState(null);

    useEffect(() => {
      if (_scannerInfoCache) return;  /* already have it */
      if (_scannerInfoPromise) {
        /* Another component is already fetching — piggyback */
        _scannerInfoPromise.then(setInfo).catch(setError).finally(() => setLoading(false));
        return;
      }
      _scannerInfoPromise = fetch(RAILWAY_URL + '/scanner/info')
        .then(r => r.ok ? r.json() : Promise.reject(new Error('HTTP ' + r.status)))
        .then(data => {
          _scannerInfoCache = data;
          return data;
        });
      _scannerInfoPromise
        .then(setInfo)
        .catch(e => setError(e.message))
        .finally(() => setLoading(false));
    }, []);

    return { info, loading, error };
  }

  /* ── ADA COMPLIANCE SCORE DERIVATION ─────────────────────────────
     Single source of truth for the 0-100 compliance score used in
     audit reports (ADAReport.jsx), estimator quotes (Estimator.jsx),
     and ProposalCart tokens. Added v1.1.0 (May 13 2026) per the
     CD Compliance Framework data model.

     Formula:
       score = 100 - (critical * 7) - (warning * 2) - (suggestion * 0.3)
       Floored at 0, rounded to integer.

     Severity bucket mapping (handles BOTH axe-core impact tags AND
     CD custom-checks wpsb_severity values):
       axe 'critical' | 'serious'  → critical
       axe 'moderate'              → warning
       axe 'minor'                 → suggestion
       custom wpsb_severity values → use as-is (critical/warning/suggestion)
  */
  function categorizeViolation(v) {
    if (!v) return 'warning';
    /* CD custom rules carry their own bucket already */
    if (v.wpsb_severity && ['critical', 'warning', 'suggestion'].indexOf(v.wpsb_severity) !== -1) {
      return v.wpsb_severity;
    }
    /* axe-core impact → CD bucket */
    switch (v.impact) {
      case 'critical':
      case 'serious':  return 'critical';
      case 'moderate': return 'warning';
      case 'minor':    return 'suggestion';
      default:         return 'warning';
    }
  }

  function deriveComplianceScore(violations) {
    if (!Array.isArray(violations) || violations.length === 0) return 100;
    var counts = { critical: 0, warning: 0, suggestion: 0 };
    violations.forEach(function(v) {
      var bucket = categorizeViolation(v);
      if (counts[bucket] !== undefined) counts[bucket]++;
    });
    var score = 100 - (counts.critical * 7) - (counts.warning * 2) - (counts.suggestion * 0.3);
    return Math.max(0, Math.round(score));
  }

  /* Convenience: full breakdown so consumers (report, estimator)
     can render both the score AND the counts that produced it. */
  function deriveComplianceBreakdown(violations) {
    if (!Array.isArray(violations)) violations = [];
    var counts = { critical: 0, warning: 0, suggestion: 0 };
    violations.forEach(function(v) {
      var bucket = categorizeViolation(v);
      if (counts[bucket] !== undefined) counts[bucket]++;
    });
    return {
      score:      deriveComplianceScore(violations),
      critical:   counts.critical,
      warning:    counts.warning,
      suggestion: counts.suggestion,
      total:      violations.length,
    };
  }

  /* ── ADA REMEDIATION ESTIMATOR ───────────────────────────────────
     Maps violations to estimated remediation minutes for the
     Estimator quote auto-gen flow. Defaults are conservative
     averages from the Konza FQHC project; specific rules in
     cd-scan-rules.json (when loaded) override these via the
     `estimated_remediation_minutes` field per rule.
     Added v1.1.0 (May 13 2026).
  */
  var DEFAULT_REMEDIATION_MINUTES = {
    /* axe-core common rules */
    'image-alt':                  10,
    'color-contrast':             15,
    'link-name':                   8,
    'button-name':                 8,
    'heading-order':              20,
    'html-has-lang':               2,
    'document-title':              5,
    'duplicate-id':               12,
    'duplicate-id-aria':          12,
    'aria-allowed-attr':          15,
    'aria-roles':                 15,
    'aria-valid-attr':            10,
    'select-name':                 8,
    'label':                       8,
    'region':                     25,
    'landmark-one-main':          15,
    'bypass':                     20,
    'frame-title':                 5,
    'empty-heading':               4,
    'link-in-text-block':         10,
    'meta-viewport':               2,
    'meta-refresh':                3,
    'identical-links-same-purpose': 12,
    'scrollable-region-focusable': 15,
    /* CD custom-rule defaults — match cd-scan-rules.json IDs */
    'lnk-001':                     1,   /* internal link target=_blank (v1.2.0) */
    'lnk-002':                     1,   /* external missing rel=noopener (v1.2.0) */
    'lnk-003':                     5,   /* PDF icon on non-PDF link */
    'lnk-004':                     3,   /* internal link target=_blank */
    'lnk-005':                     3,   /* uppercase URL slug (v1.2.0) */
    'lnk-006':                     2,   /* missing rel=noopener */
    'lnk-007':                     2,   /* weak link text — "click here" (v1.2.0) */
    'lnk-008':                     1,   /* PDF link missing indicator (v1.2.0) */
    'lnk-009':                     5,   /* fake link — span/div with role=link, no href (v1.1.0) */
    'lng-001':                     6,   /* Spanish block missing lang */
    'lng-002':                     4,   /* missing Spanish accents — conservative (v1.5.0) */
    'lng-003':                     3,   /* untranslated English in Spanish content (v1.5.0) */
    'sem-001':                    15,   /* duplicate IDs on page */
    'sem-002':                     3,   /* multiple H1 elements (v1.3.0) */
    'sem-003':                     5,   /* skipped heading levels — axe passthrough (v1.3.0) */
    'sem-004':                     3,   /* duplicate heading text at same level (v1.3.0) */
    'sem-007':                     8,   /* orphan closing tag */
    'sem-009':                     3,   /* multiple nav without aria-label (v1.3.0) */
    'sem-010':                     3,   /* broken HTML tags <nr>, <l>, etc. (v1.1.0) */
    'sem-011':                     2,   /* excessive consecutive <br> tags (v1.3.0) */
    'sem-012':                     3,   /* markdown leakage in HTML body (v1.3.0) */
    'sem-013':                     2,   /* ALL CAPS body text (v1.3.0) */
    'inl-001':                     2,   /* inline color span wrapping link (v1.4.0) */
    'inl-002':                     3,   /* inline style on link/p/span/div (v1.3.0) */
    'inl-003':                     5,   /* repeated inline style on siblings (v1.4.0) */
    'inl-005':                     2,   /* redundant color on wrapper+child link (v1.4.0) */
    'inl-006':                     2,   /* Word/Pages class artifacts (v1.4.0) */
    'inl-007':                     1,   /* <b> instead of <strong> (v1.4.0) */
    'lnk-010':                    15,   /* year-dated URL for evergreen content (v1.4.0) */
    'lnk-011':                     2,   /* URL tracking parameters (v1.4.0) */
    'lnk-012':                     1,   /* leading space in href (v1.4.0) */
    'tel-006':                     3,   /* plain-text phone, no tel: wrapper (v1.4.0) */
    'tel-007':                     1,   /* phone display uses periods (v1.4.0) */
    'mail-001':                    2,   /* plain-text email, no mailto: wrapper (v1.4.0) */
    'tel-001':                     1,   /* tel: missing +1 country code (v1.2.0) */
    'tel-002':                     1,   /* tel: contains formatting chars (v1.2.0) */
    'tel-003':                     1,   /* extension in display not href (v1.2.0) */
    'tel-004':                     1,   /* phone link missing aria-label (v1.2.0) */
    'tel-005':                     3,   /* phone link digit mismatch — wrong-number bug (v1.1.0) */
    'ada-001':                     8,   /* text contrast under 4.5:1 (WCAG AA) */
    'ada-002':                     8,   /* color as sole indicator — heuristic (v1.5.0) */
    'ada-003':                    10,   /* focus indicator removed — partial (v1.5.0) */
    'ada-004':                     5,   /* touch target under 44x44 — best-practice AAA, server-side (v1.6.0) */
    'ada-005':                    10,   /* image missing alt attribute */
    'ada-007':                     3,   /* social icon class leak (v1.5.0) */
    'ada-008':                     4,   /* aria-label Label in Name mismatch (v1.1.0) */
    'ada-009':                     6,   /* conflicting ARIA attributes — WCAG 4.1.2 (v1.1.1) */
    'ada-010':                     1,   /* redundant aria-label exactly matches visible text (v1.1.1) */
    'ada-011':                     3,   /* image link accessible-name stutter — WCAG 1.1.1/2.4.4 (v1.1.3) */
    'ada-012':                     2,   /* <label> nested inside <textarea> — AI bad fix (v1.1.4) */
    'mail-002':                    1,   /* outdated "user at domain dot com" aria-label on mailto (v1.1.3) */
    'hc-001':                      5,   /* mental health content missing 988 (v1.1.0, FQHC priority) */
  };

  function estimateRemediationMinutes(violations, ruleCatalog) {
    if (!Array.isArray(violations) || violations.length === 0) {
      return { total_minutes: 0, total_hours: 0, per_rule: [], fallback_count: 0 };
    }
    var perRule = {};
    var fallbackCount = 0;

    violations.forEach(function(v) {
      var ruleId = v.id || 'unknown';
      var minutes = 0;
      var source = 'default';

      /* Look up rule catalog first if provided */
      if (ruleCatalog && Array.isArray(ruleCatalog.rules)) {
        var match = ruleCatalog.rules.find(function(r) { return r.rule_id === ruleId; });
        if (match && typeof match.estimated_remediation_minutes === 'number') {
          minutes = match.estimated_remediation_minutes;
          source = 'catalog';
        }
      }

      /* Fall back to defaults */
      if (minutes === 0 && DEFAULT_REMEDIATION_MINUTES[ruleId]) {
        minutes = DEFAULT_REMEDIATION_MINUTES[ruleId];
        source = 'default';
      }

      /* Last-resort fallback by severity */
      if (minutes === 0) {
        var bucket = categorizeViolation(v);
        minutes = bucket === 'critical' ? 15 : bucket === 'warning' ? 8 : 4;
        source = 'severity_fallback';
        fallbackCount++;
      }

      /* Per-node multiplier: each failing element typically needs its own fix */
      var nodeCount = v.nodeCount || (v.nodes ? v.nodes.length : 1) || 1;
      var totalForRule = minutes * Math.min(nodeCount, 10);  /* cap at 10× to prevent runaway estimates */

      if (perRule[ruleId]) {
        perRule[ruleId].minutes += totalForRule;
        perRule[ruleId].count   += 1;
      } else {
        perRule[ruleId] = {
          rule_id:   ruleId,
          label:     v.help || ruleId,
          severity:  categorizeViolation(v),
          minutes:   totalForRule,
          count:     1,
          source:    source,
        };
      }
    });

    var perRuleArray = Object.values(perRule).sort(function(a, b) { return b.minutes - a.minutes; });
    var totalMinutes = perRuleArray.reduce(function(sum, r) { return sum + r.minutes; }, 0);

    return {
      total_minutes:  totalMinutes,
      total_hours:    Math.round((totalMinutes / 60) * 10) / 10,
      per_rule:       perRuleArray,
      fallback_count: fallbackCount,
    };
  }

  /* Generate ProposalCart items from violations + estimate.
     Used by Estimator's "Import ADA findings" button to populate
     the cart with one line item per rule (not per violation, to
     avoid 30-row carts). Each item carries the rule_id, count,
     estimated hours, severity for UI rendering. */
  function generateAdaCartItems(violations, ruleCatalog, hourlyRate) {
    hourlyRate = hourlyRate || 125;  /* default CD rate; client can override */
    var est = estimateRemediationMinutes(violations, ruleCatalog);
    return est.per_rule.map(function(r) {
      var hours = Math.round((r.minutes / 60) * 10) / 10;
      var lowPrice  = Math.round(hours * hourlyRate);
      var highPrice = Math.round(hours * hourlyRate * 1.4);  /* +40% complexity buffer */
      return {
        id:           'ada-' + r.rule_id + '-' + Date.now(),
        source:       'ada-scan',
        category:     'remediation',
        label:        r.label + ' (' + r.count + ' instance' + (r.count > 1 ? 's' : '') + ')',
        description:  'Severity: ' + r.severity + ' · Estimated ' + hours + ' hr per CD remediation catalog',
        scope:        r.rule_id,
        lowPrice:     lowPrice,
        highPrice:    highPrice,
        hours:        hours,
        priority:     r.severity === 'critical' ? 1 : r.severity === 'warning' ? 2 : 3,
        clientNotes:  '',
        internalNotes: r.source === 'severity_fallback' ? 'No specific time estimate — used severity-band fallback' : '',
        included:     true,
        recurring:    false,
        wpsb_rule_id: r.rule_id,
        wpsb_severity: r.severity,
      };
    });
  }

  /* ── EXPORT ──────────────────────────────────────────────────── */
  window.WPSB_Scanner = {
    useScannerData,
    useScannerInfo,
    scanSite,
    normalizeScanData,
    enrichBrandData,
    enrichPagesData,
    extractColorsFromCss,
    extractFontsFromCss,
    extractLogoUrl,
    fetchStylesheetsCss,
    /* Scan handoff — target pages call these on mount */
    getScanHandoff,
    clearScanHandoff,
    /* Saved scans (Tier 2 persistence) */
    saveScanPermanent,
    listSavedScans,
    deleteSavedScan,
    loadSavedScan,
    /* ADA compliance score (v1.1.0 — CD Compliance Framework) */
    categorizeViolation,
    deriveComplianceScore,
    deriveComplianceBreakdown,
    /* ADA remediation estimator (v1.1.0 — Estimator quote auto-gen) */
    estimateRemediationMinutes,
    generateAdaCartItems,
    EMPTY_SCAN_DATA,
    RAILWAY_URL,
  };

  console.log('[WPSB Scanner] Data layer loaded (Session 9 — Brand enrichment + ADA compliance score). Railway:', RAILWAY_URL);
})();